def create_app(): app = Flask(__name__) app.config.from_object(CONFIG_CLASS) init_logging() socketio = SocketIO(app, cors_allowed_origins="*") if os.getenv("FLASK_ENV") == "development": app = register_teardown_request(app) # read directory mount based config into Flask config try: conf_data = get_user_conf() app.config.update(conf_data) except Exception: app.logger.warning("Failed to load config.json") app.config["ORCHEST_REPO_TAG"] = get_repo_tag() # create thread for non-cpu bound background tasks, e.g. requests scheduler = BackgroundScheduler( job_defaults={ # Infinite amount of grace time, so that if a task cannot be # instantly executed (e.g. if the webserver is busy) then it # will eventually be. "misfire_grace_time": 2**31, "coalesce": False, # So that the same job can be in the queue an infinite # amount of times, e.g. for concurrent requests issuing the # same tasks. "max_instances": 2**31, }) app.config["SCHEDULER"] = scheduler scheduler.start() app.logger.info("Flask CONFIG: %s" % app.config) # Create the database if it does not exist yet. Roughly equal to a # "CREATE DATABASE IF NOT EXISTS <db_name>" call. if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]): create_database(app.config["SQLALCHEMY_DATABASE_URI"]) db.init_app(app) ma.init_app(app) # necessary for migration Migrate().init_app(app, db) with app.app_context(): # Alembic does not support calling upgrade() concurrently if not is_werkzeug_parent(): # Upgrade to the latest revision. This also takes care of # bringing an "empty" db (no tables) on par. try: upgrade() except Exception as e: logging.error("Failed to run upgrade() %s [%s]" % (e, type(e))) # On startup all kernels are freshed. This is because # updating Orchest might make the kernels in the # userdir/.orchest/kernels directory invalid. projs = Project.query.all() for proj in projs: try: populate_kernels(app, db, proj.uuid) except Exception as e: logging.error( "Failed to populate kernels on startup for project %s: %s [%s]" % (proj.uuid, e, type(e))) # To avoid multiple removals in case of a flask --reload, so # that this code runs once per container. try: os.mkdir("/tmp/jupyter_lock_removed") lock_path = os.path.join("/userdir", _config.JUPYTER_USER_CONFIG, "lab", ".bootlock") if os.path.exists(lock_path): app.logger.info("Removing dangling jupyter boot lock.") os.rmdir(lock_path) except FileExistsError: app.logger.info("/tmp/jupyter_lock_removed exists. " " Not removing the lock again.") # Telemetry if not app.config["TELEMETRY_DISABLED"]: # initialize posthog posthog.api_key = base64.b64decode( app.config["POSTHOG_API_KEY"]).decode() posthog.host = app.config["POSTHOG_HOST"] # send a ping now analytics_ping(app) # and every 15 minutes scheduler.add_job( analytics_ping, "interval", minutes=app.config["TELEMETRY_INTERVAL"], args=[app], ) # static file serving @app.route("/", defaults={"path": ""}, methods=["GET"]) @app.route("/<path:path>", methods=["GET"]) def index(path): # in Debug mode proxy to CLIENT_DEV_SERVER_URL if os.environ.get("FLASK_ENV") == "development": return _proxy(request, app.config["CLIENT_DEV_SERVER_URL"] + "/") else: file_path = os.path.join(app.config["STATIC_DIR"], path) if os.path.isfile(file_path): return send_from_directory(app.config["STATIC_DIR"], path) else: return send_from_directory(app.config["STATIC_DIR"], "index.html") register_views(app, db) register_orchest_api_views(app, db) register_background_tasks_view(app, db) register_socketio_broadcast(socketio) register_analytics_views(app, db) processes = [] if not is_werkzeug_parent(): file_dir = os.path.dirname(os.path.realpath(__file__)) # log_streamer process log_streamer_process = Popen( ["python3", "-m", "scripts.log_streamer"], cwd=os.path.join(file_dir, ".."), stderr=subprocess.STDOUT, ) app.logger.info("Started log_streamer.py") processes.append(log_streamer_process) return app, socketio, processes
def create_app(): app = Flask(__name__) app.config.from_object(CONFIG_CLASS) init_logging() socketio = SocketIO(app, cors_allowed_origins="*") if os.getenv("FLASK_ENV") == "development": app = register_teardown_request(app) # read directory mount based config into Flask config try: conf_data = get_user_conf() app.config.update(conf_data) except Exception: app.logger.warning("Failed to load config.json") app.config["ORCHEST_REPO_TAG"] = get_repo_tag() # create thread for non-cpu bound background tasks, e.g. requests scheduler = BackgroundScheduler( job_defaults={ # Infinite amount of grace time, so that if a task cannot be # instantly executed (e.g. if the webserver is busy) then it # will eventually be. "misfire_grace_time": 2**31, "coalesce": False, # So that the same job can be in the queue an infinite # amount of times, e.g. for concurrent requests issuing the # same tasks. "max_instances": 2**31, }) app.config["SCHEDULER"] = scheduler scheduler.start() app.logger.info("Flask CONFIG: %s" % app.config) # Create the database if it does not exist yet. Roughly equal to a # "CREATE DATABASE IF NOT EXISTS <db_name>" call. if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]): create_database(app.config["SQLALCHEMY_DATABASE_URI"]) db.init_app(app) ma.init_app(app) # necessary for migration Migrate().init_app(app, db) with app.app_context(): # Alembic does not support calling upgrade() concurrently if not is_werkzeug_parent(): # Upgrade to the latest revision. This also takes care of # bringing an "empty" db (no tables) on par. try: upgrade() except Exception as e: logging.error("Failed to run upgrade() %s [%s]" % (e, type(e))) initialize_default_datasources(db, app) # Telemetry if not app.config["TELEMETRY_DISABLED"]: # initialize posthog posthog.api_key = base64.b64decode( app.config["POSTHOG_API_KEY"]).decode() posthog.host = app.config["POSTHOG_HOST"] # send a ping now analytics_ping(app) # and every 15 minutes scheduler.add_job( analytics_ping, "interval", minutes=app.config["TELEMETRY_INTERVAL"], args=[app], ) # static file serving @app.route("/public/<path:path>") def send_files(path): return send_from_directory("../static", path) register_views(app, db) register_orchest_api_views(app, db) register_background_tasks_view(app, db) register_socketio_broadcast(db, socketio) register_analytics_views(app, db) processes = [] if not is_werkzeug_parent(): file_dir = os.path.dirname(os.path.realpath(__file__)) # log_streamer process log_streamer_process = Popen( ["python3", "-m", "scripts.log_streamer"], cwd=os.path.join(file_dir, ".."), stderr=subprocess.STDOUT, ) app.logger.info("Started log_streamer.py") processes.append(log_streamer_process) return app, socketio, processes
def create_app(config_class=None, use_db=True, be_scheduler=False): """Create the Flask app and return it. Args: config_class: Configuration class. See orchest-api/app/config. use_db: If true, associate a database to the Flask app instance, which implies connecting to a given database and possibly creating such database and/or tables if they do not exist already. The reason to differentiate instancing the app through this argument is that the celery worker does not need to connect to the db that "belongs" to the orchest-api. be_scheduler: If true, a background thread will act as a job scheduler, according to the logic in core/scheduler. While Orchest runs, only a single process should be acting as scheduler. Returns: Flask.app """ app = Flask(__name__) app.config.from_object(config_class) init_logging() # Cross-origin resource sharing. Allow API to be requested from the # different microservices such as the webserver. CORS(app, resources={r"/*": {"origins": "*"}}) if os.getenv("FLASK_ENV") == "development": app = register_teardown_request(app) if use_db: # Create the database if it does not exist yet. Roughly equal to # a "CREATE DATABASE IF NOT EXISTS <db_name>" call. if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]): create_database(app.config["SQLALCHEMY_DATABASE_URI"]) db.init_app(app) # necessary for migration Migrate().init_app(app, db) with app.app_context(): # Alembic does not support calling upgrade() concurrently if not is_werkzeug_parent(): # Upgrade to the latest revision. This also takes # care of bringing an "empty" db (no tables) on par. try: upgrade() except Exception as e: logging.error("Failed to run upgrade() %s [%s]" % (e, type(e))) # In case of an ungraceful shutdown, these entities could be # in an invalid state, so they are deleted, since for sure # they are not running anymore. # To avoid the issue of entities being deleted because of a # flask app reload triggered by a --dev code change, we # attempt to create a directory first. Since this is an # atomic operation that will result in an error if the # directory is already there, this cleanup operation will # run only once per container. try: os.mkdir("/tmp/cleanup_done") InteractiveSession.query.delete() # Delete old JupyterBuilds on start to avoid # accumulation in the DB. Leave the latest such that the # user can see details about the last executed build # after restarting Orchest. jupyter_builds = ( JupyterBuild.query.order_by(JupyterBuild.requested_time.desc()) .offset(1) .all() ) # Can't use offset and .delete in conjunction in # sqlalchemy unfortunately. for jupyer_build in jupyter_builds: db.session.delete(jupyer_build) db.session.commit() # Fix interactive runs. runs = InteractivePipelineRun.query.filter( InteractivePipelineRun.status.in_(["PENDING", "STARTED"]) ).all() with TwoPhaseExecutor(db.session) as tpe: for run in runs: AbortPipelineRun(tpe).transaction(run.uuid) # Fix one off jobs (and their pipeline runs). jobs = Job.query.filter_by(schedule=None, status="STARTED").all() with TwoPhaseExecutor(db.session) as tpe: for job in jobs: AbortJob(tpe).transaction(job.uuid) # This is to fix the state of cron jobs pipeline runs. runs = NonInteractivePipelineRun.query.filter( NonInteractivePipelineRun.status.in_(["STARTED"]) ).all() with TwoPhaseExecutor(db.session) as tpe: for run in runs: AbortPipelineRun(tpe).transaction(run.uuid) # Fix env builds. builds = EnvironmentBuild.query.filter( EnvironmentBuild.status.in_(["PENDING", "STARTED"]) ).all() with TwoPhaseExecutor(db.session) as tpe: for build in builds: AbortEnvironmentBuild(tpe).transaction(build.uuid) # Fix jupyter builds. builds = JupyterBuild.query.filter( JupyterBuild.status.in_(["PENDING", "STARTED"]) ).all() with TwoPhaseExecutor(db.session) as tpe: for build in builds: AbortJupyterBuild(tpe).transaction(build.uuid) # Trigger a build of JupyterLab if no JupyterLab image # is found for this version and JupyterLab setup_script # is non-empty. trigger_conditional_jupyter_build(app) except FileExistsError: app.logger.info("/tmp/cleanup_done exists. Skipping cleanup.") except Exception as e: app.logger.error("Cleanup failed") app.logger.error(e) if be_scheduler and not is_werkzeug_parent(): # Create a scheduler and have the scheduling logic running # periodically. scheduler = BackgroundScheduler( job_defaults={ # Infinite amount of grace time, so that if a task # cannot be instantly executed (e.g. if the webserver is # busy) then it will eventually be. "misfire_grace_time": 2 ** 31, "coalesce": False, # So that the same job can be in the queue an infinite # amount of times, e.g. for concurrent requests issuing # the same tasks. "max_instances": 2 ** 31, } ) app.config["SCHEDULER"] = scheduler scheduler.start() scheduler.add_job( Scheduler.check_for_jobs_to_be_scheduled, "interval", seconds=app.config["SCHEDULER_INTERVAL"], args=[app], ) # Register blueprints. app.register_blueprint(api, url_prefix="/api") return app
def create_app(config_class=None, use_db=True, be_scheduler=False): """Create the Flask app and return it. Args: config_class: Configuration class. See orchest-api/app/config. use_db: If true, associate a database to the Flask app instance, which implies connecting to a given database and possibly creating such database and/or tables if they do not exist already. The reason to differentiate instancing the app through this argument is that the celery worker does not need to connect to the db that "belongs" to the orchest-api. be_scheduler: If true, a background thread will act as a job scheduler, according to the logic in core/scheduler. While Orchest runs, only a single process should be acting as scheduler. Returns: Flask.app """ app = Flask(__name__) app.config.from_object(config_class) init_logging() # Cross-origin resource sharing. Allow API to be requested from the # different microservices such as the webserver. CORS(app, resources={r"/*": {"origins": "*"}}) if os.getenv("FLASK_ENV") == "development": app = register_teardown_request(app) if use_db: # Create the database if it does not exist yet. Roughly equal to # a "CREATE DATABASE IF NOT EXISTS <db_name>" call. if not database_exists(app.config["SQLALCHEMY_DATABASE_URI"]): create_database(app.config["SQLALCHEMY_DATABASE_URI"]) db.init_app(app) # necessary for migration Migrate().init_app(app, db) with app.app_context(): # Alembic does not support calling upgrade() concurrently if not is_werkzeug_parent(): # Upgrade to the latest revision. This also takes # care of bringing an "empty" db (no tables) on par. try: upgrade() except Exception as e: logging.error("Failed to run upgrade() %s [%s]" % (e, type(e))) # In case of an ungraceful shutdown, these entities could be # in an invalid state, so they are deleted, since for sure # they are not running anymore. # To avoid the issue of entities being deleted because of a # flask app reload triggered by a dev mode code change, we # attempt to create a directory first. Since this is an # atomic operation that will result in an error if the # directory is already there, this cleanup operation will # run only once per container. try: os.mkdir("/tmp/interactive_cleanup_done") InteractiveSession.query.delete() InteractivePipelineRun.query.filter( InteractivePipelineRun.status.in_( ["PENDING", "STARTED"])).delete(synchronize_session="fetch") db.session.commit() except FileExistsError: pass if be_scheduler: # Create a scheduler and have the scheduling logic running # periodically. scheduler = BackgroundScheduler( job_defaults={ # Infinite amount of grace time, so that if a task # cannot be instantly executed (e.g. if the webserver is # busy) then it will eventually be. "misfire_grace_time": 2**31, "coalesce": False, # So that the same job can be in the queue an infinite # amount of times, e.g. for concurrent requests issuing # the same tasks. "max_instances": 2**31, }) app.config["SCHEDULER"] = scheduler scheduler.start() scheduler.add_job( Scheduler.check_for_jobs_to_be_scheduled, "interval", seconds=app.config["SCHEDULER_INTERVAL"], args=[app], ) # Register blueprints. app.register_blueprint(api, url_prefix="/api") return app