Exemple #1
0
def test_row_str_and_repr():
    r = Row({'a': 1, 'b': 2})
    expected = '  a    b\n---  ---\n  1    2'

    assert str(r) in expected
    assert repr(r) in expected
    # parse html representation with pandas
    html = pd.read_html(r._repr_html_())[0]
    assert html.to_dict() == {'a': {0: 1}, 'b': {0: 2}}
Exemple #2
0
    def status(self, return_code_diff=False):
        """Prints the current task status
        """
        p = self.product

        data = {}

        data['name'] = self.name

        if p.timestamp is not None:
            dt = datetime.fromtimestamp(p.timestamp)
            date_h = dt.strftime('%b %d, %y at %H:%M')
            time_h = humanize.naturaltime(dt)
            data['Last updated'] = '{} ({})'.format(time_h, date_h)
        else:
            data['Last updated'] = 'Has not been run'

        data['Outdated dependencies'] = p._outdated_data_dependencies()
        outd_code = p._outdated_code_dependency()
        data['Outdated code'] = outd_code

        if outd_code and return_code_diff:
            data['Code diff'] = (self.dag
                                 .differ
                                 .get_diff(p.stored_source_code,
                                           self.source_code,
                                           language=self.source.language))
        else:
            outd_code = ''

        data['Product'] = str(self.product)
        data['Doc (short)'] = self.source.doc_short
        data['Location'] = self.source.loc

        return Row(data)
Exemple #3
0
def test_table_auto_size(monkeypatch):
    TerminalSize = namedtuple('TerminalSize', ['columns'])
    monkeypatch.setattr(shutil, 'get_terminal_size', lambda: TerminalSize(80))

    r = Row({'a': '1' * 60, 'b': '1' * 60})
    table = Table([r, r], column_width='auto')

    assert max([len(line) for line in str(table).splitlines()]) == 80

    # simulate resize
    monkeypatch.setattr(shutil, 'get_terminal_size', lambda: TerminalSize(120))
    assert max([len(line) for line in str(table).splitlines()]) == 120
Exemple #4
0
def test_table_str_and_repr(monkeypatch):
    mock = Mock()
    mock.get_terminal_size().columns = 6
    monkeypatch.setattr(table, 'shutil', mock)

    r = Row({'a': 1, 'b': 2})
    t = Table([r, r])
    expected = '  a    b\n---  ---\n  1    2\n  1    2'

    assert str(t) == expected
    assert repr(t) == expected
    # parse html representation with pandas
    html = pd.read_html(t._repr_html_())[0]
    assert html.to_dict(orient='list') == {'a': [1, 1], 'b': [2, 2]}
Exemple #5
0
    def status(self, return_code_diff=False, sections=None):
        """Prints the current task status

        Parameters
        ----------
        sections : list, optional
            Sections to include. Defaults to "name", "last_run",
            "oudated", "product", "doc", "location"
        """
        sections = sections or [
            'name', 'last_run', 'outdated', 'product', 'doc', 'location'
        ]

        p = self.product

        data = {}

        if 'name' in sections:
            data['name'] = self.name

        if 'type' in sections:
            data['type'] = type(self).__name__

        if 'status' in sections:
            data['status'] = self.exec_status.name

        if 'client' in sections:
            # FIXME: all tasks should have a client property
            data['client'] = (repr(self.client)
                              if hasattr(self, 'client') else None)

        if 'last_run' in sections:
            if p.metadata.timestamp is not None:
                dt = datetime.fromtimestamp(p.metadata.timestamp)
                date_h = dt.strftime('%b %d, %y at %H:%M')
                time_h = humanize.naturaltime(dt)
                data['Last run'] = '{} ({})'.format(time_h, date_h)
            else:
                data['Last run'] = 'Has not been run'

        outd_data = p._outdated_data_dependencies()
        outd_code = p._outdated_code_dependency()

        outd = False

        if outd_code:
            outd = 'Source code'

        if outd_data:
            if not outd:
                outd = 'Upstream'
            else:
                outd += ' & Upstream'

        if 'outdated' in sections:
            data['Outdated?'] = outd

        if 'outdated_dependencies' in sections:
            data['Outdated dependencies'] = outd_data

        if 'outdated_code' in sections:
            data['Outdated code'] = outd_code

        if outd_code and return_code_diff:
            data['Code diff'] = (self.dag.differ.get_diff(
                p.metadata.stored_source_code,
                str(self.source),
                extension=self.source.extension))
        else:
            outd_code = ''

        if 'product_type' in sections:
            data['Product type'] = type(self.product).__name__

        if 'product' in sections:
            data['Product'] = repr(self.product)

        if 'product_client' in sections:
            # FIXME: all products should have a client property
            data['Product client'] = (repr(self.product.client) if hasattr(
                self.product, 'client') else None)

        if 'doc' in sections:
            data['Doc (short)'] = _doc_short(self.source.doc)

        if 'location' in sections:
            data['Location'] = self.source.loc

        return Row(data)
Exemple #6
0
def test_convert_to_dict():
    d = {'a': 1, 'b': 2}
    r = Row(d)
    t = Table([r, r], column_width=None)
    assert t.to_dict() == {'a': [1, 1], 'b': [2, 2]}
Exemple #7
0
def test_convert_to_pandas():
    d = {'a': 1, 'b': 2}
    r = Row(d)
    t = Table([r, r], column_width=None)
    expected = pd.DataFrame({'a': [1, 1], 'b': [2, 2]})
    assert expected.equals(t.to_pandas())
Exemple #8
0
def test_create_build_report():
    row = Row({'Elapsed (s)': 1})
    report = BuildReport([row, row])
    assert report == {'Elapsed (s)': [1, 1], 'Percentage': [50, 50]}
Exemple #9
0
def test_table_values():
    d = {'a': 1, 'b': 2}
    r = Row(d)
    t = Table([r, r], column_width=None)
    assert t.values == {'a': [1, 1], 'b': [2, 2]}
Exemple #10
0
def test_select_multiple_cols_in_table():
    d = {'a': 1, 'b': 2}
    r = Row(d)
    t = Table([r, r], column_width=None)
    assert t[['a', 'b']] == {'a': [1, 1], 'b': [2, 2]}
Exemple #11
0
def test_error_if_row_initialized_with_non_mapping():
    with pytest.raises(TypeError):
        Row([])
Exemple #12
0
def test_select_multiple_cols_in_row():
    r = Row({'a': 1, 'b': 2})
    assert r[['a', 'b']] == {'a': 1, 'b': 2}
Exemple #13
0
def test_table_wrap():
    r = Row({'a': 'abc d', 'b': 'abc d'})
    table = Table([r, r], column_width=3)
    # Max expected length: 3 (col a) + 2 (whitespace) + 3 (col b) = 8
    assert max([len(line) for line in str(table).splitlines()]) == 8
Exemple #14
0
def test_table_iter():
    r = Row({'a': 1, 'b': 2})
    t = Table([r, r])
    assert set(iter(t)) == {'a', 'b'}
Exemple #15
0
def test_row_str_setitem():
    r = Row({'a': 1, 'b': 2})
    r['a'] = 10
    assert r['a'] == 10
Exemple #16
0
    def build(self, force=False):
        """Run the task if needed by checking its dependencies

        Returns
        -------
        dict
            A dictionary with keys 'run' and 'elapsed'
        """
        # TODO: if this is run in a task that has upstream dependencies
        # it will fail with a useless error since self.params does not have
        # upstream yet (added after rendering)

        # NOTE: should i fetch metadata here? I need to make sure I have
        # the latest before building

        self._logger.info(f'-----\nChecking {repr(self)}....')

        # do not run unless some of the conditions below match...
        run = False
        elapsed = 0

        if force:
            self._logger.info('Forcing run, skipping checks...')
            run = True
        else:
            # not forcing, need to check dependencies...
            p_exists = self.product.exists()

            # check dependencies only if the product exists and there is
            # metadata
            if p_exists and self.product.metadata is not None:

                outdated_data_deps = self.product._outdated_data_dependencies()
                outdated_code_dep = self.product._outdated_code_dependency()

                self._logger.info('Checking dependencies...')

                if outdated_data_deps:
                    run = True
                    self._logger.info('Outdated data deps...')
                else:
                    self._logger.info('Up-to-date data deps...')

                if outdated_code_dep:
                    run = True
                    self._logger.info('Outdated code dep...')
                else:
                    self._logger.info('Up-to-date code dep...')
            else:
                run = True

                # just log why it will run
                if not p_exists:
                    self._logger.info('Product does not exist...')

                if self.product.metadata is None:
                    self._logger.info('Product metadata is None...')

                self._logger.info('Running...')

        if run:
            self._logger.info(f'Starting execution: {repr(self)}')

            then = datetime.now()

            try:
                self.run()
            except Exception as e:
                tb = traceback.format_exc()

                if self.on_failure:
                    try:
                        self.on_failure(self, tb)
                    except Exception:
                        self._logger.exception('Error executing on_failure '
                                               'callback')
                raise e

            now = datetime.now()
            elapsed = (now - then).total_seconds()
            self._logger.info(f'Done. Operation took {elapsed:.1f} seconds')

            # update metadata
            self.product.timestamp = datetime.now().timestamp()
            self.product.stored_source_code = self.source_code
            self.product.save_metadata()

            # TODO: also check that the Products were updated:
            # if they did not exist, they must exist now, if they alredy
            # exist, timestamp must be recent equal to the datetime.now()
            # used. maybe run fetch metadata again and validate?

            if not self.product.exists():
                raise TaskBuildError(f'Error building task "{self}": '
                                     'the task ran successfully but product '
                                     f'"{self.product}" does not exist yet '
                                     '(task.product.exist() returned False)')

            if self.on_finish:
                try:
                    if 'client' in inspect.getfullargspec(self.on_finish).args:
                        self.on_finish(self, client=self.client)
                    else:
                        self.on_finish(self)

                except Exception as e:
                    raise TaskBuildError('Exception when running on_finish '
                                         'for task {}: {}'.format(self, e))

        else:
            self._logger.info(f'No need to run {repr(self)}')

        self._logger.info('-----\n')

        self._status = TaskStatus.Executed

        for t in self._get_downstream():
            t._update_status()

        self.build_report = Row({'name': self.name, 'Ran?': run,
                                 'Elapsed (s)': elapsed, })

        return self
Exemple #17
0
def test_select_col_in_table():
    r = Row({'a': 1, 'b': 2})
    t = Table([r, r], column_width=None)
    assert t['a'] == [1, 1]
Exemple #18
0
def test_rows2columns():
    r1 = Row({'a': 1})
    r2 = Row({'a': 2})

    assert rows2columns([r1, r2]) == {'a': [1, 2]}