# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json import time from threading import Thread from pinball.config.utils import get_log from pinball.ui.data_builder import DataBuilder LOG = get_log('pinball.ui.cache_thread') __author__ = 'Julia Oh, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Julia Oh', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' def start_cache_thread(dbstore): """Creates and starts a daemon thread for workflow data computation. This method is called when pinball ui server starts. Args: dbstore: The store to retrieve runs status.
from pinball.ui.data import Status from pinball.ui.data_builder import DataBuilder from pinball.workflow.name import Name from pinball.workflow.signaller import Signal from pinball.workflow.signaller import Signaller from pinball.workflow.utils import load_path __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.scheduler.schedule') class Schedule(TokenData): """Parent class for specialized schedule types.""" __metaclass__ = abc.ABCMeta def __init__(self, next_run_time=None, recurrence_seconds=None, overrun_policy=OverrunPolicy.SKIP): self.next_run_time = next_run_time self.recurrence_seconds = recurrence_seconds self.overrun_policy = overrun_policy def advance_next_run_time(self): """Advance the scheduled run time beyond the current time.""" now = time.time()
from pinball.master.thrift_lib.ttypes import Query from pinball.master.thrift_lib.ttypes import QueryRequest from pinball.master.thrift_lib.ttypes import TokenMasterException from pinball.workflow.name import Name from pinball.workflow.signaller import Signal __author__ = "Pawel Garbacki" __copyright__ = "Copyright 2015, Pinterest, Inc." __credits__ = [__author__] __license__ = "Apache" __version__ = "2.0" LOG = get_log("pinball.workflow.archiver") class Archiver(object): def __init__(self, client, workflow, instance): self._client = client self._workflow = workflow self._instance = instance def _get_instance_tokens(self): """Retrieve all workflow instance tokens. Returns: List of tokens in the workflow instance. """ prefix = Name(workflow=self._workflow, instance=self._instance)
import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from pinball.config.pinball_config import PinballConfig from pinball.config.utils import get_log from pinball.config.utils import timestamp_to_str from pinball.ui.data import Status __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.workflow.emailer') class Emailer(object): """Send emails representing certain events.""" def __init__(self, ui_host, ui_port): self._ui_host = ui_host self._ui_port = ui_port def _send_message(self, subject, to, text, html): """Send a message through local SMTP server. Args: subject: The subject of the email message. to: The list of recipient email addresses. text: The email body in text format.
from pinball.persistence.token_data import TokenData from pinball.workflow import log_saver from pinball.workflow.job import ShellConditionJob from pinball.workflow.job import ShellJob from pinball.workflow.buffered_line_reader import BufferedLineReader from pinball.workflow.utils import get_logs_dir __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.workflow.job_executor') class ExecutionRecord(TokenData): """A data object holding information about a single job execution.""" def __init__(self, info=None, instance=None, start_time=None, end_time=None, exit_code=None, logs=None): self.info = info self.instance = instance self.start_time = start_time self.end_time = end_time self.exit_code = exit_code self.events = [] # TODO(pawel): rename this to attributes for naming consistency. self.properties = {} self.cleanup_exit_code = None
from django.core import management from pinball.config.pinball_config import PinballConfig from pinball.config.utils import get_log from pinball.config.utils import master_name from pinball.master.factory import Factory from pinball.ui import cache_thread __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.run_pinball') DbStore = None Scheduler = None Emailer = None Worker = None def _pinball_imports(): """Import Pinball modules. The reason why these imports are not at the top level is that some of the imported code (db models initializing table names) depends on parameters passed on the command line (master name). Those imports need to be delayed until after command line parameter parsing. """
from pinball.ui.cache_thread import get_workflows_json from pinball.ui.data_builder import DataBuilder from pinball.ui.utils import get_workflow_jobs_from_parser_by_web_viewer from pinball.ui.workflow_graph import WorkflowGraph from pinball.persistence.store import DbStore from pinball.workflow.signaller import Signal __author__ = 'Pawel Garbacki' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.ui.views') # Custom message level SIGNIN = 35 def _serialize(elements): elements_list = [] for element in elements: elements_list.append(element.format()) to_serialize = {'aaData': elements_list} return json.dumps(to_serialize) def workflows(_): try: workflows_json = get_workflows_json()
close_connection = db.close_connection except AttributeError: # close_connection() was removed in Django 1.8 from django.db import transaction def close_connection(): db.connection.close() __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.persistence.store') class Store(object): """An interface for persistent token containers.""" __metaclass__ = abc.ABCMeta def __init__(self): self.initialize() @abc.abstractmethod def initialize(self): """Initialize the token store.""" return @abc.abstractmethod
from pinball.master.thrift_lib.ttypes import ArchiveRequest from pinball.master.thrift_lib.ttypes import GroupRequest from pinball.master.thrift_lib.ttypes import ModifyRequest from pinball.master.thrift_lib.ttypes import QueryAndOwnRequest from pinball.master.thrift_lib.ttypes import QueryRequest from pinball.master.thrift_lib import TokenMasterService __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.master.client') class Client(object): """Interface of a client communicating with token master.""" __metaclass__ = abc.ABCMeta def __init__(self): # Mapping from request class to the end point that handles requests of # this type. self._request_to_end_point = None def call(self, request): return self._request_to_end_point[request.__class__](request) # For description of individual methods, see master.thrift.
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Logic handling log read/write.""" import abc import os import time from pinball.common import s3_utils from pinball.config.pinball_config import PinballConfig from pinball.config.utils import get_log LOG = get_log('pinball.workflow.log_saver') __author__ = 'Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' class LogSaver(object): """Interface of a component reading and writing job execution logs.""" __metaclass__ = abc.ABCMeta @abc.abstractmethod def write(self, content_str): """Write content_str to the file.
from thrift.server import TServer from thrift.transport import TSocket from thrift.transport import TTransport from pinball.config.utils import get_log from pinball.master.client import LocalClient, RemoteClient from pinball.master.master_handler import MasterHandler from pinball.master.thrift_lib.TokenMasterService import Processor __author__ = 'Pawel Garbacki' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.master.factory') class Factory(object): """Factory creating token master and clients.""" def __init__(self, master_hostname=None, master_port=None): """Create a factory. Args: master_hostname: Hostname of the master server. Not required if master is running locally. Defaults to the name of the local host. master_port: Port of the master server. Not required if master is running locally. """ self._master_handler = None
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json import time from threading import Thread from pinball.config.utils import get_log from pinball.ui.data_builder import DataBuilder LOG = get_log('pinball.ui.cache_thread') __author__ = 'Julia Oh, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Julia Oh', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' def start_cache_thread(dbstore): """Creates and starts a daemon thread for workflow data computation. This method is called when pinball ui server starts. Args:
from django.db import transaction from pinball.config.utils import get_log from pinball.persistence.models import ActiveTokenModel from pinball.persistence.models import ArchivedTokenModel from pinball.persistence.models import CachedDataModel __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.persistence.store') class Store(object): """An interface for persistent token containers.""" __metaclass__ = abc.ABCMeta def __init__(self): self.initialize() @abc.abstractmethod def initialize(self): """Initialize the token store.""" return @abc.abstractmethod
from pinball.ui.data import TokenData from pinball.ui.data import TokenPathData from pinball.ui.data import WorkflowScheduleData from pinball.ui.data import WorkflowData from pinball.ui.data import WorkflowInstanceData from pinball.workflow import log_saver from pinball.workflow.name import Name from pinball.workflow.signaller import Signal __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.ui.data_builder') class DataBuilder(object): # TODO(pawel): change use_cache default to True after we gain enough # confidence that it is bug free. def __init__(self, store, use_cache=False): self._store = store self.use_cache = use_cache @staticmethod def _parse_job_token_name(token_name): name = Name.from_job_token_name(token_name) if name.workflow: return name return None
from pinball.ui.cache_thread import get_workflows_json from pinball.ui.data_builder import DataBuilder from pinball.ui.utils import get_workflow_jobs_from_parser from pinball.ui.workflow_graph import WorkflowGraph from pinball.persistence.store import DbStore from pinball.workflow.signaller import Signal __author__ = 'Pawel Garbacki' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.ui.views') # Custom message level SIGNIN = 35 def _serialize(elements): elements_list = [] for element in elements: elements_list.append(element.format()) to_serialize = {'aaData': elements_list} return json.dumps(to_serialize) def workflows(_): try: workflows_json = get_workflows_json()
from pinball.config.utils import get_log from pinball.config.utils import timestamp_to_str from pinball.persistence.token_data import TokenData from pinball.workflow import log_saver from pinball.workflow.job import ShellConditionJob from pinball.workflow.job import ShellJob from pinball.workflow.buffered_line_reader import BufferedLineReader from pinball.workflow.utils import get_logs_dir __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.workflow.job_executor') class ExecutionRecord(TokenData): """A data object holding information about a single job execution.""" def __init__(self, info=None, instance=None, start_time=None, end_time=None, exit_code=None, logs=None): self.info = info self.instance = instance self.start_time = start_time self.end_time = end_time
from pinball.persistence.token_data import TokenData from pinball.scheduler.overrun_policy import OverrunPolicy from pinball.ui.data import Status from pinball.ui.data_builder import DataBuilder from pinball.workflow.name import Name from pinball.workflow.signaller import Signal from pinball.workflow.signaller import Signaller from pinball.workflow.utils import load_path __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.scheduler.schedule') class Schedule(TokenData): """Parent class for specialized schedule types.""" __metaclass__ = abc.ABCMeta def __init__(self, next_run_time=None, recurrence_seconds=None, overrun_policy=OverrunPolicy.SKIP): self.next_run_time = next_run_time self.recurrence_seconds = recurrence_seconds self.overrun_policy = overrun_policy def advance_next_run_time(self):
from thrift.transport import TTransport from pinball.config.utils import get_log from pinball.master.client import LocalClient, RemoteClient from pinball.master.master_handler import MasterHandler from pinball.master.thrift_lib.TokenMasterService import Processor __author__ = 'Pawel Garbacki' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.master.factory') class Factory(object): """Factory creating token master and clients.""" def __init__(self, master_hostname=None, master_port=None): """Create a factory. Args: master_hostname: Hostname of the master server. Not required if master is running locally. Defaults to the name of the local host. master_port: Port of the master server. Not required if master is running locally. """
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Logic handling log read/write.""" import abc import os import time from pinball.common import s3_utils from pinball.config.pinball_config import PinballConfig from pinball.config.utils import get_log LOG = get_log('pinball.workflow.log_saver') __author__ = 'Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' class LogSaver(object): """Interface of a component reading and writing job execution logs.""" __metaclass__ = abc.ABCMeta @abc.abstractmethod def write(self, content_str):
"""Implementation of the token master logic.""" import pytrie import sys import threading from pinball.config.utils import get_log from pinball.master.blessed_version import BlessedVersion from pinball.master.transaction import REQUEST_TO_TRANSACTION __author__ = 'Pawel Garbacki' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.master.master_handler') class MasterHandler(object): """Handler implementing the token master logic. Tokens are stored in a trie where keys are token names while the values are the tokens themselves. Trie structure provides an efficient access to operations on token name prefixes such as token querying and counting. A special type of singleton token - called the blessed version - is stored in the tree with other tokens. The blessed version is used to generate unique version numbers. """ _BLESSED_VERSION = '/__BLESSED_VERSION__' _MASTER_OWNER = '__master__'
from pinball.master.thrift_lib.ttypes import ModifyResponse from pinball.master.thrift_lib.ttypes import QueryAndOwnRequest from pinball.master.thrift_lib.ttypes import QueryAndOwnResponse from pinball.master.thrift_lib.ttypes import QueryRequest from pinball.master.thrift_lib.ttypes import QueryResponse from pinball.master.thrift_lib.ttypes import TokenMasterException __author__ = 'Pawel Garbacki' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.master.transaction') class Transaction(object): """Interface defining a transaction on a token trie.""" __metaclass__ = abc.ABCMeta def __init__(self): self._updates = [] self._deletes = [] self._committed = False self._blessed_version = None self._store = None self._trie = None @abc.abstractmethod
job).""" import abc from pinball.config.utils import get_log from pinball.persistence.token_data import TokenData from pinball.workflow.name import Name __author__ = 'Pawel Garbacki' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.workflow.worker') class Job(TokenData): """Parent class for specialized job types.""" __metaclass__ = abc.ABCMeta IS_CONDITION = False def __init__(self, name=None, inputs=None, outputs=None, emails=None, max_attempts=1, retry_delay_sec=0, warn_timeout_sec=None, abort_timeout_sec=None): self.name = name self.inputs = inputs if inputs is not None else [] self.outputs = outputs if outputs is not None else [] self.emails = emails if emails is not None else []
Job object describes job inputs, outputs, and all information required to execute a job (e.g., a command line of a shell job or class name of a data job).""" import abc from pinball.config.utils import get_log from pinball.persistence.token_data import TokenData from pinball.workflow.name import Name __author__ = 'Pawel Garbacki' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.workflow.worker') class Job(TokenData): """Parent class for specialized job types.""" __metaclass__ = abc.ABCMeta IS_CONDITION = False def __init__(self, name=None, inputs=None, outputs=None, emails=None, max_attempts=1, retry_delay_sec=0,
from pinball.config.pinball_config import PinballConfig from pinball.config.utils import get_log __author__ = 'Mao Ye, Changshu Liu' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Mao Ye', 'Changshu Liu'] __license__ = 'Apache' __version__ = '2.0' if not boto.config.has_section('Boto'): boto.config.add_section('Boto') boto.config.set('Boto', 'http_socket_timeout', '180') LOG = get_log('pinball.common.s3_utils') def parse_s3_location(s3_location): """Parse s3_location to get the bucket name and the rest of the file path. Args: s3_location: A string in the form of: 's3n://<bucket_name>/<rest_of_the_file_path>'. Returns: bucket_name, rest_of_the_file_path """ try: regex = r'\s*s3n://(.+?)/(.+)' return re.match(regex, s3_location).groups()
from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from pinball.config.pinball_config import PinballConfig from pinball.config.utils import get_log from pinball.config.utils import timestamp_to_str from pinball.ui.data import Status __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.workflow.emailer') class Emailer(object): """Send emails representing certain events.""" def __init__(self, ui_host, ui_port): self._ui_host = ui_host self._ui_port = ui_port def _send_message(self, subject, to, text, html): """Send a message through local SMTP server. Args: subject: The subject of the email message. to: The list of recipient email addresses. text: The email body in text format.
from pinball.ui.data import WorkflowScheduleData from pinball.ui.data import WorkflowData from pinball.ui.data import WorkflowInstanceData from pinball.workflow import log_saver from pinball.workflow.name import Name from pinball.workflow.signaller import Signal __author__ = 'Pawel Garbacki, Mao Ye' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = ['Pawel Garbacki', 'Mao Ye'] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.ui.data_builder') class DataBuilder(object): # TODO(pawel): change use_cache default to True after we gain enough # confidence that it is bug free. def __init__(self, store, use_cache=False): self._store = store self.use_cache = use_cache @staticmethod def _parse_job_token_name(token_name): name = Name.from_job_token_name(token_name) if name.workflow: return name return None
import sys import threading from pinball.config.utils import get_log from pinball.master.blessed_version import BlessedVersion from pinball.master.transaction import REQUEST_TO_TRANSACTION __author__ = 'Pawel Garbacki' __copyright__ = 'Copyright 2015, Pinterest, Inc.' __credits__ = [__author__] __license__ = 'Apache' __version__ = '2.0' LOG = get_log('pinball.master.master_handler') class MasterHandler(object): """Handler implementing the token master logic. Tokens are stored in a trie where keys are token names while the values are the tokens themselves. Trie structure provides an efficient access to operations on token name prefixes such as token querying and counting. A special type of singleton token - called the blessed version - is stored in the tree with other tokens. The blessed version is used to generate unique version numbers. """ _BLESSED_VERSION = '/__BLESSED_VERSION__' _MASTER_OWNER = '__master__'