def __init__(self, executor_config=None): """ Initialize parameters for the data job execution layer. Args: executor_config: a dictionary of configuration params that override the defaults. The key names should be the same as the field names of corresponding xxxExecutor.Config class. """ executor_config = executor_config if executor_config else {} self.log = get_logger(self.__class__.__name__) # Override self.config using executor_config dict. self.config = self.Config() for key, value in executor_config.items(): if key == 'USER_LIBJAR_DIRS': self.config.USER_LIBJAR_DIRS = value.split(',') else: setattr(self.config, key, value) # Construct HadoopHostConfig object according to overridden config. self.hadoop_host_config = self._contruct_hadoop_host_config() self.job_ids = []
import inspect import pkgutil import sys from collections import defaultdict from pinball_ext.common import utils __author__ = 'Mao Ye, Changshu Liu' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = utils.get_logger('pinball_ext.common.import_utils') class ModuleImport(object): """ModuleImport supports: 1) import classes/modules for given import _import_directories 2) retrieve class for given class_name 3) retrieve all subclass names for a given parent class name """ def __init__(self, import_directories, base_class=object, map_class_module=False): if not import_directories: raise Exception("import directories are missing") self._import_directories = import_directories if base_class:
""" import subprocess from pinball_ext.common.decorators import retry from pinball_ext.common.utils import get_logger __author__ = 'Dmitry Chechik, Changshu Liu' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_logger("pinball_ext.common.shell_utils") _DEFAULT_SSH_OPTS = [ '-o', 'UserKnownHostsFile=/dev/null', '-o', 'StrictHostKeyChecking=no', # Suppress warnings while SSHing. '-o', 'LogLevel=quiet', ] _DEFAULT_RSYNC_OPTS = [ '-u', # Skip files that are newer on the receiver. '-L', # Transform symlinks to real files. '-v', ]
__author__ = 'Mao Ye, Changshu Liu' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' AWS_ACCESS_KEY_ID = '' AWS_SECRET_ACCESS_KEY = '' if not boto.config.has_section('Boto'): boto.config.add_section('Boto') boto.config.set('Boto', 'http_socket_timeout', '180') LOG = get_logger('pinball_ext.common.s3_utils') def config_s3_utils(aws_access_key_id, aws_secret_access_key): """Config core parameters for s3_utils module. Args: aws_access_key_id: first parameter passed to boto.connect_s3() aws_secrete_access_key: second parameter passed to boto.connect_s3() Returns: None """ assert aws_access_key_id assert aws_secret_access_key
import fcntl import optparse import traceback from pinball_ext.common import utils from pinball_ext.job import job_module __author__ = 'Changshu Liu, Mohammad Shahangian, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = utils.get_logger('pinball_ext.job.job_runner') def _acquire_exclusive_lock(write_lock_name): """Acquires a lock with a given name. Underneath we create a lock file and return a descriptor of that file. The lock will be held as long as this file descriptor is open. """ lock_filename = '/var/lock/%s.lock' % write_lock_name LOG.info('Acquiring lock %s ...', lock_filename) lock_file = open(lock_filename, 'w') fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX) LOG.info('Acquired lock %s.', lock_filename) return lock_file
# See the License for the specific language governing permissions and # limitations under the License. """Various decorator classes.""" import sys import time from pinball_ext.common.utils import get_logger __author__ = 'Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_logger('pinball_ext.common.decorators.retry') def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=LOG, sleep_func=time.sleep, max_delay=sys.maxint): """Retry calling the decorated function using an exponential backoff. http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry Args:
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pinball_ext.common import utils from pinball_ext.job.basic_jobs import ClusterJob __author__ = 'Changshu Liu, Mohammad Shahangian, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = utils.get_logger('pinball_ext.job.hadoop_jobs') class HadoopJob(ClusterJob): """Base class for actual Hadoop jobs. App jar and lib jars are configured in executor, please see Executor.run_hadoop_job() for detailed info. Derived class should at least override _get_class_name() to specify what's the main Java class to execute. It can also optionally override _setup() to config the follow parameters to further tune the job config: - self.jobconf_args - self.extra_jars - self.extra_arguments
from pinball_ext.common import hadoop_utils from pinball_ext.common import utils from pinball_ext.executor.common import make_executor from pinball_ext.executor.common import Platform __author__ = 'Mao Ye, Mohammad Shahangian, Changshu Liu' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' _SUCCESS_FILE = '_SUCCESS' JOB_SLEEP_TIME_SEC = 60 LOG = utils.get_logger('pinball_ext.job.basic_jobs') class JobBase(object): """Base class for all actual Jobs. All actual Jobs will take a (possibly empty) dictionary of parameters that it will parse and apply to its calculation. args: params - key value pairs for the params that the job will run using To run a Job we invoke the runjob() method which decides how to call the following methods: - _setup(): Responsible for constructing the arguments for the command line tool being invoked and doing any other prep work such as
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from pinball_ext.common import utils from pinball_ext.job.basic_jobs import ClusterJob __author__ = 'Changshu Liu, Mohammad Shahangian, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = utils.get_logger('pinball_ext.job.hadoop_jobs') class HadoopJob(ClusterJob): """Base class for actual Hadoop jobs. App jar and lib jars are configured in executor, please see Executor.run_hadoop_job() for detailed info. Derived class should at least override _get_class_name() to specify what's the main Java class to execute. It can also optionally override _setup() to config the follow parameters to further tune the job config: - self.jobconf_args - self.extra_jars - self.extra_arguments
"""Various decorator classes.""" import sys import time from pinball_ext.common.utils import get_logger __author__ = 'Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_logger('pinball_ext.common.decorators.retry') def retry(ExceptionToCheck, tries=4, delay=3, backoff=2, logger=LOG, sleep_func=time.sleep, max_delay=sys.maxint): """Retry calling the decorated function using an exponential backoff. http://www.saltycrane.com/blog/2009/11/trying-out-retry-decorator-python/ original from: http://wiki.python.org/moin/PythonDecoratorLibrary#Retry Args: ExceptionToCheck: exception to check. May be a tuple of exceptions to check. tries: an integer, number of times to try (not retry) before giving up. delay: an integer, initial delay between retries in seconds.
""" import fcntl import optparse import traceback from pinball_ext.common import utils from pinball_ext.job import job_module __author__ = 'Changshu Liu, Mohammad Shahangian, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = utils.get_logger('pinball_ext.job.job_runner') def _acquire_exclusive_lock(write_lock_name): """Acquires a lock with a given name. Underneath we create a lock file and return a descriptor of that file. The lock will be held as long as this file descriptor is open. """ lock_filename = '/var/lock/%s.lock' % write_lock_name LOG.info('Acquiring lock %s ...', lock_filename) lock_file = open(lock_filename, 'w') fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX) LOG.info('Acquired lock %s.', lock_filename) return lock_file
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os from pinball_ext.common import utils from pinball_ext.job.basic_jobs import ClusterJob __author__ = 'Changshu Liu, Mao Ye, Mohammad Shahangian' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = utils.get_logger('pinball_ext.job.hive_jobs') class HiveJobBase(ClusterJob): """Base class for jobs that run Hive query.""" # If set to true, upload archive; otherwise don't upload archive _UPLOAD_ARCHIVE = False def _get_query_template(self): """Get the hive query template as a string. The returned template may contain some place holder parameters that will be replaced with self.params. """ raise NotImplementedError("No query template available in HiveJobBase")
# limitations under the License. import inspect import pkgutil import sys from collections import defaultdict from pinball_ext.common import utils __author__ = 'Mao Ye, Changshu Liu' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = utils.get_logger('pinball_ext.common.import_utils') class ModuleImport(object): """ModuleImport supports: 1) import classes/modules for given import _import_directories 2) retrieve class for given class_name 3) retrieve all subclass names for a given parent class name """ def __init__(self, import_directories, base_class=object, map_class_module=False): if not import_directories: raise Exception("import directories are missing") self._import_directories = import_directories
# limitations under the License. import os from pinball_ext.common import utils from pinball_ext.job.basic_jobs import ClusterJob __author__ = 'Changshu Liu, Mao Ye, Mohammad Shahangian' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = utils.get_logger('pinball_ext.job.hive_jobs') class HiveJobBase(ClusterJob): """Base class for jobs that run Hive query.""" # If set to true, upload archive; otherwise don't upload archive _UPLOAD_ARCHIVE = False def _get_query_template(self): """Get the hive query template as a string. The returned template may contain some place holder parameters that will be replaced with self.params. """ raise NotImplementedError("No query template available in HiveJobBase")