Ejemplo n.º 1
0
    def __init__(self, executor_config=None):
        """ Initialize parameters for the data job execution layer.

        Args:
            executor_config: a dictionary of configuration params that override
            the defaults. The key names should be the same as the field names of
            corresponding xxxExecutor.Config class.
        """
        executor_config = executor_config if executor_config else {}

        self.log = get_logger(self.__class__.__name__)

        # Override self.config using executor_config dict.
        self.config = self.Config()
        for key, value in executor_config.items():
            if key == 'USER_LIBJAR_DIRS':
                self.config.USER_LIBJAR_DIRS = value.split(',')
            else:
                setattr(self.config, key, value)

        # Construct HadoopHostConfig object according to overridden config.
        self.hadoop_host_config = self._contruct_hadoop_host_config()

        self.job_ids = []
Ejemplo n.º 2
0
    def __init__(self, executor_config=None):
        """ Initialize parameters for the data job execution layer.

        Args:
            executor_config: a dictionary of configuration params that override
            the defaults. The key names should be the same as the field names of
            corresponding xxxExecutor.Config class.
        """
        executor_config = executor_config if executor_config else {}

        self.log = get_logger(self.__class__.__name__)

        # Override self.config using executor_config dict.
        self.config = self.Config()
        for key, value in executor_config.items():
            if key == 'USER_LIBJAR_DIRS':
                self.config.USER_LIBJAR_DIRS = value.split(',')
            else:
                setattr(self.config, key, value)

        # Construct HadoopHostConfig object according to overridden config.
        self.hadoop_host_config = self._contruct_hadoop_host_config()

        self.job_ids = []
Ejemplo n.º 3
0
import inspect
import pkgutil
import sys

from collections import defaultdict
from pinball_ext.common import utils


__author__ = 'Mao Ye, Changshu Liu'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'


LOG = utils.get_logger('pinball_ext.common.import_utils')


class ModuleImport(object):
    """ModuleImport supports:
    1) import classes/modules for given import _import_directories
    2) retrieve class for given class_name
    3) retrieve all subclass names for a given parent class name
    """

    def __init__(self, import_directories,
                 base_class=object, map_class_module=False):
        if not import_directories:
            raise Exception("import directories are missing")
        self._import_directories = import_directories
        if base_class:
Ejemplo n.º 4
0
"""

import subprocess

from pinball_ext.common.decorators import retry
from pinball_ext.common.utils import get_logger


__author__ = 'Dmitry Chechik, Changshu Liu'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'


LOG = get_logger("pinball_ext.common.shell_utils")

_DEFAULT_SSH_OPTS = [
    '-o', 'UserKnownHostsFile=/dev/null',
    '-o', 'StrictHostKeyChecking=no',
    # Suppress warnings while SSHing.
    '-o', 'LogLevel=quiet',
]

_DEFAULT_RSYNC_OPTS = [
    '-u',  # Skip files that are newer on the receiver.
    '-L',  # Transform symlinks to real files.
    '-v',
]

Ejemplo n.º 5
0

__author__ = 'Mao Ye, Changshu Liu'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'


AWS_ACCESS_KEY_ID = ''
AWS_SECRET_ACCESS_KEY = ''
if not boto.config.has_section('Boto'):
    boto.config.add_section('Boto')
    boto.config.set('Boto', 'http_socket_timeout', '180')

LOG = get_logger('pinball_ext.common.s3_utils')


def config_s3_utils(aws_access_key_id, aws_secret_access_key):
    """Config core parameters for s3_utils module.

    Args:
        aws_access_key_id: first parameter passed to boto.connect_s3()
        aws_secrete_access_key: second parameter passed to boto.connect_s3()

    Returns:
        None
    """
    assert aws_access_key_id
    assert aws_secret_access_key
Ejemplo n.º 6
0
import fcntl
import optparse
import traceback

from pinball_ext.common import utils
from pinball_ext.job import job_module


__author__ = 'Changshu Liu, Mohammad Shahangian, Mao Ye'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'


LOG = utils.get_logger('pinball_ext.job.job_runner')


def _acquire_exclusive_lock(write_lock_name):
    """Acquires a lock with a given name.

    Underneath we create a lock file and return a descriptor of that file. The
    lock will be held as long as this file descriptor is open.
    """
    lock_filename = '/var/lock/%s.lock' % write_lock_name
    LOG.info('Acquiring lock %s ...', lock_filename)

    lock_file = open(lock_filename, 'w')
    fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
    LOG.info('Acquired lock %s.', lock_filename)
    return lock_file
Ejemplo n.º 7
0
# See the License for the specific language governing permissions and
# limitations under the License.
"""Various decorator classes."""

import sys
import time

from pinball_ext.common.utils import get_logger

__author__ = 'Mao Ye'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'

LOG = get_logger('pinball_ext.common.decorators.retry')


def retry(ExceptionToCheck,
          tries=4,
          delay=3,
          backoff=2,
          logger=LOG,
          sleep_func=time.sleep,
          max_delay=sys.maxint):
    """Retry calling the decorated function using an exponential backoff.

    http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
    original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry

    Args:
Ejemplo n.º 8
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pinball_ext.common import utils
from pinball_ext.job.basic_jobs import ClusterJob


__author__ = 'Changshu Liu, Mohammad Shahangian, Mao Ye'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'


LOG = utils.get_logger('pinball_ext.job.hadoop_jobs')


class HadoopJob(ClusterJob):
    """Base class for actual Hadoop jobs.

    App jar and lib jars are configured in executor, please see
    Executor.run_hadoop_job() for detailed info.

    Derived class should at least override _get_class_name() to specify what's
    the main Java class to execute. It can also optionally override _setup() to
    config the follow parameters to further tune the job config:
    - self.jobconf_args
    - self.extra_jars
    - self.extra_arguments
Ejemplo n.º 9
0
from pinball_ext.common import hadoop_utils
from pinball_ext.common import utils
from pinball_ext.executor.common import make_executor
from pinball_ext.executor.common import Platform


__author__ = 'Mao Ye, Mohammad Shahangian, Changshu Liu'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'


_SUCCESS_FILE = '_SUCCESS'
JOB_SLEEP_TIME_SEC = 60
LOG = utils.get_logger('pinball_ext.job.basic_jobs')


class JobBase(object):
    """Base class for all actual Jobs.

    All actual Jobs will take a (possibly empty) dictionary of parameters that
    it will parse and apply to its calculation.

    args:
        params - key value pairs for the params that the job will run using

    To run a Job we invoke the runjob() method which decides how to call
    the following methods:
        - _setup(): Responsible for constructing the arguments for the command
            line tool being invoked and doing any other prep work such as
Ejemplo n.º 10
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pinball_ext.common import utils
from pinball_ext.job.basic_jobs import ClusterJob

__author__ = 'Changshu Liu, Mohammad Shahangian, Mao Ye'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'

LOG = utils.get_logger('pinball_ext.job.hadoop_jobs')


class HadoopJob(ClusterJob):
    """Base class for actual Hadoop jobs.

    App jar and lib jars are configured in executor, please see
    Executor.run_hadoop_job() for detailed info.

    Derived class should at least override _get_class_name() to specify what's
    the main Java class to execute. It can also optionally override _setup() to
    config the follow parameters to further tune the job config:
    - self.jobconf_args
    - self.extra_jars
    - self.extra_arguments
Ejemplo n.º 11
0
"""Various decorator classes."""

import sys
import time

from pinball_ext.common.utils import get_logger


__author__ = 'Mao Ye'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'


LOG = get_logger('pinball_ext.common.decorators.retry')


def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=LOG,
          sleep_func=time.sleep, max_delay=sys.maxint):
    """Retry calling the decorated function using an exponential backoff.

    http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/
    original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry

    Args:
        ExceptionToCheck: exception to check. May be a tuple of
            exceptions to check.
        tries: an integer, number of times to try (not retry) before
            giving up.
        delay: an integer, initial delay between retries in seconds.
Ejemplo n.º 12
0
"""

import fcntl
import optparse
import traceback

from pinball_ext.common import utils
from pinball_ext.job import job_module

__author__ = 'Changshu Liu, Mohammad Shahangian, Mao Ye'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'

LOG = utils.get_logger('pinball_ext.job.job_runner')


def _acquire_exclusive_lock(write_lock_name):
    """Acquires a lock with a given name.

    Underneath we create a lock file and return a descriptor of that file. The
    lock will be held as long as this file descriptor is open.
    """
    lock_filename = '/var/lock/%s.lock' % write_lock_name
    LOG.info('Acquiring lock %s ...', lock_filename)

    lock_file = open(lock_filename, 'w')
    fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
    LOG.info('Acquired lock %s.', lock_filename)
    return lock_file
Ejemplo n.º 13
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from pinball_ext.common import utils
from pinball_ext.job.basic_jobs import ClusterJob

__author__ = 'Changshu Liu, Mao Ye, Mohammad Shahangian'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'

LOG = utils.get_logger('pinball_ext.job.hive_jobs')


class HiveJobBase(ClusterJob):
    """Base class for jobs that run Hive query."""

    # If set to true, upload archive; otherwise don't upload archive
    _UPLOAD_ARCHIVE = False

    def _get_query_template(self):
        """Get the hive query template as a string.

        The returned template may contain some place holder parameters that will
        be replaced with self.params.
        """
        raise NotImplementedError("No query template available in HiveJobBase")
Ejemplo n.º 14
0
# limitations under the License.

import inspect
import pkgutil
import sys

from collections import defaultdict
from pinball_ext.common import utils

__author__ = 'Mao Ye, Changshu Liu'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'

LOG = utils.get_logger('pinball_ext.common.import_utils')


class ModuleImport(object):
    """ModuleImport supports:
    1) import classes/modules for given import _import_directories
    2) retrieve class for given class_name
    3) retrieve all subclass names for a given parent class name
    """
    def __init__(self,
                 import_directories,
                 base_class=object,
                 map_class_module=False):
        if not import_directories:
            raise Exception("import directories are missing")
        self._import_directories = import_directories
Ejemplo n.º 15
0
# limitations under the License.

import os

from pinball_ext.common import utils
from pinball_ext.job.basic_jobs import ClusterJob


__author__ = 'Changshu Liu, Mao Ye, Mohammad Shahangian'
__copyright__ = 'Copyright 2015, Pinterest, Inc.'
__credits__ = [__author__]
__license__ = 'Apache'
__version__ = '2.0'


LOG = utils.get_logger('pinball_ext.job.hive_jobs')


class HiveJobBase(ClusterJob):
    """Base class for jobs that run Hive query."""

    # If set to true, upload archive; otherwise don't upload archive
    _UPLOAD_ARCHIVE = False

    def _get_query_template(self):
        """Get the hive query template as a string.

        The returned template may contain some place holder parameters that will
        be replaced with self.params.
        """
        raise NotImplementedError("No query template available in HiveJobBase")