Beispiel #1
0
def test_subclass_param():
    class Super(object):
        pass

    class Sub(Super):
        pass

    class Alone(object):
        pass

    assert check.subclass_param(Sub, "foo", Super)

    with pytest.raises(CheckError):
        assert check.subclass_param(Alone, "foo", Super)

    with pytest.raises(CheckError):
        assert check.subclass_param("value", "foo", Super)

    assert check.opt_subclass_param(Sub, "foo", Super)
    assert check.opt_subclass_param(None, "foo", Super) is None

    with pytest.raises(CheckError):
        assert check.opt_subclass_param(Alone, "foo", Super)

    with pytest.raises(CheckError):
        assert check.opt_subclass_param("value", "foo", Super)
Beispiel #2
0
    def execute_queries(
        self, queries, fetch_results=False, cursor_factory=None, error_callback=None
    ):
        '''Fake for execute_queries; returns [self.QUERY_RESULT] * 3

        Args:
            queries (List[str]): The queries to execute.
            fetch_results (Optional[bool]): Whether to return the results of executing the query.
                Defaults to False, in which case the query will be executed without retrieving the
                results.
            cursor_factory (Optional[:py:class:`psycopg2.extensions.cursor`]): An alternative
                cursor_factory; defaults to None. Will be used when constructing the cursor.
            error_callback (Optional[Callable[[Exception, Cursor, DagsterLogManager], None]]): A
                callback function, invoked when an exception is encountered during query execution;
                this is intended to support executing additional queries to provide diagnostic
                information, e.g. by querying ``stl_load_errors`` using ``pg_last_copy_id()``. If no
                function is provided, exceptions during query execution will be raised directly.

        Returns:
            Optional[List[List[Tuple[Any, ...]]]]: Results of the query, as a list of list of
                tuples, when fetch_results is set. Otherwise return None.
        '''
        check.list_param(queries, 'queries', of_type=str)
        check.bool_param(fetch_results, 'fetch_results')
        check.opt_subclass_param(cursor_factory, 'cursor_factory', psycopg2.extensions.cursor)
        check.opt_callable_param(error_callback, 'error_callback')

        for query in queries:
            self.log.info('Executing query \'{query}\''.format(query=query))
        if fetch_results:
            return [self.QUERY_RESULT] * 3
Beispiel #3
0
    def execute_query(self, query, fetch_results=False, cursor_factory=None):
        '''Synchronously execute a single query against Redshift. Will return a list of rows, where
        each row is a tuple of values, e.g. SELECT 1 will return [(1,)].

        Args:
            query (str): The query to execute.
            fetch_results (Optional[bool]): Whether to return the results of executing the query.
                Defaults to False, in which case the query will be executed without retrieving the
                results.
            cursor_factory (Optional[:py:class:`psycopg2.extensions.cursor`]): An alternative
                cursor_factory; defaults to None. Will be used when constructing the cursor.

        Returns:
            Optional[List[Tuple[Any, ...]]]: Results of the query, as a list of tuples, when
                fetch_results is set. Otherwise return None.
        '''
        check.str_param(query, 'query')
        check.bool_param(fetch_results, 'fetch_results')
        check.opt_subclass_param(cursor_factory, 'cursor_factory', psycopg2.extensions.cursor)

        with self._get_cursor(cursor_factory=cursor_factory) as cursor:
            if sys.version_info[0] < 3:
                query = query.encode('utf-8')

            self.log.info('Executing query \'{query}\''.format(query=query))
            cursor.execute(query)

            if fetch_results and cursor.rowcount > 0:
                return cursor.fetchall()
            else:
                self.log.info('Empty result from query')
Beispiel #4
0
    def execute_query(self, query, fetch_results=False, cursor_factory=None, error_callback=None):
        '''Synchronously execute a single query against Redshift. Will return a list of rows, where
        each row is a tuple of values, e.g. SELECT 1 will return [(1,)].

        Args:
            query (str): The query to execute.
            fetch_results (Optional[bool]): Whether to return the results of executing the query.
                Defaults to False, in which case the query will be executed without retrieving the
                results.
            cursor_factory (Optional[:py:class:`psycopg2.extensions.cursor`]): An alternative
                cursor_factory; defaults to None. Will be used when constructing the cursor.
            error_callback (Optional[Callable[[Exception, Cursor, DagsterLogManager], None]]): A
                callback function, invoked when an exception is encountered during query execution;
                this is intended to support executing additional queries to provide diagnostic
                information, e.g. by querying ``stl_load_errors`` using ``pg_last_copy_id()``. If no
                function is provided, exceptions during query execution will be raised directly.

        Returns:
            Optional[List[Tuple[Any, ...]]]: Results of the query, as a list of tuples, when
                fetch_results is set. Otherwise return None.
        '''
        check.str_param(query, 'query')
        check.bool_param(fetch_results, 'fetch_results')
        check.opt_subclass_param(cursor_factory, 'cursor_factory', psycopg2.extensions.cursor)
        check.opt_callable_param(error_callback, 'error_callback')

        with self._get_conn() as conn:
            with self._get_cursor(conn, cursor_factory=cursor_factory) as cursor:
                try:
                    six.ensure_str(query)

                    self.log.info('Executing query \'{query}\''.format(query=query))
                    cursor.execute(query)

                    if fetch_results and cursor.rowcount > 0:
                        return cursor.fetchall()
                    else:
                        self.log.info('Empty result from query')

                except Exception as e:  # pylint: disable=broad-except
                    # If autocommit is disabled or not set (it is disabled by default), Redshift
                    # will be in the middle of a transaction at exception time, and because of
                    # the failure the current transaction will not accept any further queries.
                    #
                    # This conn.commit() call closes the open transaction before handing off
                    # control to the error callback, so that the user can issue additional
                    # queries. Notably, for e.g. pg_last_copy_id() to work, it requires you to
                    # use the same conn/cursor, so you have to do this conn.commit() to ensure
                    # things are in a usable state in the error callback.
                    if not self.autocommit:
                        conn.commit()

                    if error_callback is not None:
                        error_callback(e, cursor, self.log)
                    else:
                        raise
Beispiel #5
0
    def _get_cursor(self, conn, cursor_factory=None):
        check.opt_subclass_param(cursor_factory, 'cursor_factory', psycopg2.extensions.cursor)

        # Could be none, in which case we should respect the connection default. Otherwise
        # explicitly set to true/false.
        if self.autocommit is not None:
            conn.autocommit = self.autocommit

        with conn:
            with conn.cursor(cursor_factory=cursor_factory) as cursor:
                yield cursor

            # If autocommit is set, we'll commit after each and every query execution. Otherwise, we
            # want to do a final commit after we're wrapped up executing the full set of one or more
            # queries.
            if not self.autocommit:
                conn.commit()
Beispiel #6
0
    def __init__(self,
                 name,
                 pipeline_dict=None,
                 pipeline_defs=None,
                 experimental=None):
        self._name = check.str_param(name, 'name')

        pipeline_dict = check.opt_dict_param(pipeline_dict,
                                             'pipeline_dict',
                                             key_type=str)
        pipeline_defs = check.opt_list_param(pipeline_defs, 'pipeline_defs',
                                             PipelineDefinition)

        # Experimental arguments
        # TODO: Extract scheduler and scheduler_defs from RepositoryDefinition
        # https://github.com/dagster-io/dagster/issues/1693
        experimental = check.opt_dict_param(experimental, 'experimental')
        scheduler = check.opt_subclass_param(experimental.get('scheduler'),
                                             'scheduler', Scheduler)
        schedule_defs = check.opt_list_param(experimental.get('schedule_defs'),
                                             'schedule_defs',
                                             ScheduleDefinition)

        for val in pipeline_dict.values():
            check.is_callable(val, 'Value in pipeline_dict must be function')

        self._lazy_pipeline_dict = pipeline_dict

        self._pipeline_cache = {}
        self._pipeline_names = set(pipeline_dict.keys())
        for defn in pipeline_defs:
            check.invariant(
                defn.name not in self._pipeline_names,
                'Duplicate pipelines named {name}'.format(name=defn.name),
            )
            self._pipeline_names.add(defn.name)
            self._pipeline_cache[defn.name] = defn

        self._scheduler_type = scheduler
        self._schedules = {}
        for defn in schedule_defs:
            check.invariant(
                defn.name not in self._schedules,
                'Duplicate schedules named {name}'.format(name=defn.name),
            )
            self._schedules[defn.name] = defn

        self._all_pipelines = None
        self._solid_defs = None