Beispiel #1
0
    def __init__(self):
        """Initialize the singleton instance."""
        self._id = Flags.ARGS.id
        self._pipeline_date = Flags.ARGS.date

        # Set the src root.
        self._pipeline_base_dir = FileUtils.GetAbsPathForFile(Flags.ARGS.root)
        if not os.path.isdir(self._pipeline_base_dir):
            TermColor.Fatal('Invalid Root directory: %s' % Flags.ARGS.root)

        # Set the pipeline specific binary directory, if specified
        self._pipeline_bin_dir = ''
        if Flags.ARGS.bin_root:
            self._pipeline_bin_dir = FileUtils.GetAbsPathForFile(
                Flags.ARGS.bin_root)

        # Set the pipeline utilities directory
        self._pipeline_utils_dir = FileUtils.GetAbsPathForFile(
            Flags.ARGS.utils_root)

        # Create all necessary directories.
        self._pipeline_output_dir = ''
        self._pipeline_log_dir = ''
        self._pipeline_publish_dir = Flags.ARGS.publish_root
        self._subdirs = {}
        self.__CreateInitialSubDirs()
        self.PrintConfig()
Beispiel #2
0
    def _ComputeTasks(cls, targets, ignore_list=[]):
        """Computes the tasks to be evaluate given the input targets.
    Args:
      targets: list: List of input targets.
      ignore_list: list: List of strings to ignore.

    Return:
      dict{int, set(string)}: Dict from priority to set of tasks to execute at the priority.
    """
        # First create a simple task list of priority string to task.
        # Once all the tasks have been collected, then sort them to create an actual priority order.
        tasks = {}
        ignore_list += ['timeout']
        for target in targets:
            ignore = FileUtils.IgnorePath(target, ignore_list)
            if ignore:
                TermColor.Warning(
                    'Ignored target %s as anything with [%s] is ignored.' %
                    (target, ignore))
                continue

            recurse = False
            if os.path.basename(target) == '...':
                target = os.path.dirname(target)
                if not target:
                    target = FileUtils.GetAbsPathForFile(os.getcwd())
                    if target.find(PipelineConfig.Instance().pipeline_base_dir(
                    )) != 0:
                        target = PipelineConfig.Instance().pipeline_base_dir()
                recurse = True

            abs_target = FileUtils.GetAbsPathForFile(target)
            if not abs_target:
                TermColor.Warning('[%s] is not a valid path' % (target))
                continue

            if os.path.isfile(abs_target):
                cls.__AddFileToTasks(tasks, abs_target)
            elif os.path.isdir(abs_target):
                targets += FileUtils.GetFilesInDir(abs_target, recurse,
                                                   ignore_list)
            else:
                TermColor.Warning('[%s] is not supported' % (abs_target))
                continue

        return cls.__MergeTasks(tasks)
Beispiel #3
0
def _create_app_internal(db, flask_config, instance_configuration,
                         zen_configuration_module):
    # Create and configure the main Flask app. Perform any initializations that
    # should happen before the site goes online.

    # TODO(stephen): Is this the right place for this log to happen?
    is_production = flask_config.IS_PRODUCTION
    if is_production:
        LOG.info('Zenysis is running in PRODUCTION mode')

    app = Flask(__name__, static_folder='../public', static_url_path='')

    # Misc app setup and settings.
    app.secret_key = flask_config.SECRET_KEY
    app.debug = not is_production
    app.config.from_object(flask_config)

    # Register the app with our db reference
    db.init_app(app)

    # Handle migrations before anyone uses the DB
    migrations_directory = FileUtils.GetAbsPathForFile('web/server/migrations')
    Migrate(app, db, migrations_directory)

    # Only initialize the application if we are on the main processing thread.
    # In debug mode when the app is started directly (not via gunicorn), the
    # werkzeug reloader spawns a child process that gets restarted after a file
    # change. The parent process is then not used.
    if os.environ.get('SERVER_SOFTWARE', '').startswith('gunicorn') or (
            app.debug and is_running_from_reloader()):

        # NOTE(vedant): Not sure if this is the best way to accomplish this but it will at least
        # prevent errors from being thrown during server start.
        # NOTE(yitian): Initializing database seed values before app setup
        # so that if new database values are added, app setup won't error.
        initialize_database_seed_values(flask_config.SQLALCHEMY_DATABASE_URI)

        with app.app_context():
            _fail_if_schema_upgrade_needed()
            _initialize_zenysis_module(app, zen_configuration_module)
            _initialize_template_renderer(app)
            _initialize_email_renderer(app)
            _initialize_druid_context(app)
            _initialize_geo_explorer(app)
            _initialize_aqt_data(app)
            _initialize_notification_service(app, instance_configuration)
            _initialize_simple_cache(app)
            _initialize_app(app, db, is_production)

    # NOTE(stephen): The last thing we need to do when bootstrapping our app is
    # dispose of the DB connection used to initialize the app. This connection
    # *cannot* be shared across threads and disposing it prevents that from
    # happening. After disposal, each thread will have a new connection
    # established when they first access the DB.
    with app.app_context():
        db.engine.dispose()
    return app
Beispiel #4
0
    def TaskNormalizedName(cls, task):
        """Returns the normalized name for the task.
    Args:
      task: string: The task to normalize.

    Return:
      string: The Normalized name of the task.
    """
        abs_path = FileUtils.GetAbsPathForFile(task)
        if abs_path: return abs_path
        return task
Beispiel #5
0
    def GetRulesFileForRule(cls, rule):
        """Returns the RULES file for the rule.
    Args:
      rule: string: The rule for which the file is needed.

    Return:
      string: The rules file if it exists.
    """
        if not rule: return None

        return FileUtils.GetAbsPathForFile(
            os.path.join(os.path.dirname(rule), 'RULES'))
Beispiel #6
0
def read_js_version(javascript_version_file=None):
    '''Read the JS version stamp attached when building the production JS
    bundles to improve frontend error reporting. If no version file was added,
    we do not support versioning for this deployment.
    '''
    javascript_version_file = javascript_version_file or FileUtils.GetAbsPathForFile(
        'web/public/build/version.txt')
    # Only production has JS versions. If we are in production but the version
    # file does not exist, FileUtils will not resolve the absolute path and
    # will return None. Some deployments do not support versioning.
    if not IS_PRODUCTION or not javascript_version_file:
        return ''

    return FileUtils.FileContents(javascript_version_file).strip()
Beispiel #7
0
    def RuleNormalizedName(cls, rule):
        """Returns the normalized name for the rule.
    Args:
      rule: string: The rule to normalize.

    Return:
      string: The Normalized name of the rule.
    """
        if rule.find(FileUtils.GetSrcRoot()) == 0:
            return os.path.normpath(rule)

        rules_file = cls.GetRulesFileForRule(rule)
        if rules_file:
            return os.path.join(os.path.dirname(rules_file),
                                os.path.basename(rule))

        # This does not have a rules file. Generally this happens for src files.
        abs_path = FileUtils.GetAbsPathForFile(rule)
        if abs_path: return abs_path

        return rule
Beispiel #8
0
from builtins import object
import json
import os

from pylib.file.file_utils import FileUtils

from db.druid.errors import BadIndexingPathException
from db.druid.indexing.util import build_input_spec
from db.druid.util import DRUID_DATE_FORMAT
from util.file.directory_util import compute_file_hash
from util.aws.status import RUNNING_IN_EC2

# Default files for indexing, relative to the zenysis src root
DEFAULT_METRICS_SPEC_FILE = FileUtils.GetAbsPathForFile(
    'db/druid/indexing/resources/metrics_spec.json')
DEFAULT_TASK_TEMPLATE_FILE = FileUtils.GetAbsPathForFile(
    'db/druid/indexing/resources/index_task.json.tmpl')

# If running in amazon, use the m4 instance tuning config. Otherwise, use
# the less resource intensive on-prem config.
# TODO(stephen): THIS IS UGLY
DEFAULT_TUNING_CONFIG_FILE = FileUtils.GetAbsPathForFile(
    'db/druid/indexing/resources/tuning_configs/single_m4.4xlarge.json'
    if RUNNING_IN_EC2 else
    'db/druid/indexing/resources/tuning_configs/on_prem.json')


def _validate_file_path(path):
    error_str = ''
    if not path:
        error_str = 'Path cannot be empty.'