def build(self): try: # if self.fast_check(): # return self.auxiliary = {'time': now()} self.create_base() self.process_stop_tasks() self.process_start_dags() self.process_parent_tasks() self.load_tasks() self.load_computers() self.process_tasks() self.write_auxiliary() except ObjectDeletedError: pass except Exception as e: if Session.sqlalchemy_error(e): Session.cleanup(key='SupervisorBuilder') self.session = Session.create_session(key='SupervisorBuilder') self.logger = create_logger(self.session, 'SupervisorBuilder') self.logger.error(traceback.format_exc(), ComponentType.Supervisor)
def build(self): try: self.create_base() self.check_status() self.change_status() self.download() self.create_executor() self.execute() except Exception as e: if Session.sqlalchemy_error(e): Session.cleanup(key='ExecuteBuilder') self.session = Session.create_session(key='ExecuteBuilder') self.logger.session = create_logger(self.session, 'ExecuteBuilder') step = self.executor.step.id if \ (self.executor and self.executor.step) else None self.error(traceback.format_exc(), step) self.provider.change_status(self.task, TaskStatus.Failed) raise e finally: if app.current_task: app.current_task.update_state(state=states.SUCCESS) app.close() if self.exit: # noinspection PyProtectedMember os._exit(0)
def decorated(*args, **kwargs): global _session try: f(*args, **kwargs) except Exception as e: if Session.sqlalchemy_error(e): Session.cleanup(key=__name__) _session = Session.create_session(key=__name__) raise e
def sync(self): hostname = socket.gethostname() try: provider = ComputerProvider(self.session) task_synced_provider = TaskSyncedProvider(self.session) computer = provider.by_name(hostname) sync_start = now() if FILE_SYNC_INTERVAL == 0: time.sleep(1) else: computers = provider.all_with_last_activtiy() computers = [ c for c in computers if (now() - c.last_activity).total_seconds() < 10 ] computers_names = {c.name for c in computers} for c, project, tasks in task_synced_provider.for_computer( computer.name): if c.name not in computers_names: self.logger.info( f'Computer = {c.name} ' f'is offline. Can not sync', ComponentType.WorkerSupervisor, hostname) continue if c.syncing_computer: continue excluded = list(map(str, yaml_load(project.ignore_folders))) folders_excluded = [[join('data', project.name), excluded], [join('models', project.name), []]] computer.syncing_computer = c.name provider.update() sync_directed(self.session, c, computer, folders_excluded) for t in tasks: task_synced_provider.add( TaskSynced(computer=computer.name, task=t.id)) time.sleep(FILE_SYNC_INTERVAL) computer.last_synced = sync_start computer.syncing_computer = None provider.update() except Exception as e: if Session.sqlalchemy_error(e): Session.cleanup('FileSync') self.session = Session.create_session(key='FileSync') self.logger = create_logger(self.session, 'FileSync') self.logger.error(traceback.format_exc(), ComponentType.WorkerSupervisor, hostname)
def process_error(self, e: Exception): if Session.sqlalchemy_error(e): Session.cleanup('FileSync') self.session = Session.create_session(key='FileSync') self.logger = create_logger(self.session, 'FileSync') hostname = socket.gethostname() self.logger.error( traceback.format_exc(), ComponentType.WorkerSupervisor, hostname )
def wrapper(): try: f(wrapper_vars['session'], wrapper_vars['logger']) except Exception as e: if Session.sqlalchemy_error(e): Session.cleanup(name) wrapper_vars['session'] = Session.create_session(key=name) wrapper_vars['logger'] = create_logger(wrapper_vars['session'], name) wrapper_vars['logger'].error(traceback.format_exc(), ComponentType.WorkerSupervisor, hostname)
def stop(logger, session: Session, task: Task, dag: Dag): provider = TaskProvider(session) if task.status > TaskStatus.InProgress.value: return task.status status = TaskStatus.Stopped try: if task.status != TaskStatus.NotRan.value: app.control.revoke(task.celery_id, terminate=True) else: status = TaskStatus.Skipped except Exception as e: if Session.sqlalchemy_error(e): try: logger.error(traceback.format_exc(), ComponentType.API) except Exception: pass raise logger.error(traceback.format_exc(), ComponentType.API) finally: if task.pid: queue = f'{task.computer_assigned}_' \ f'{dag.docker_img or "default"}_supervisor' kill.apply_async((task.pid, ), queue=queue, retry=False) additional_info = yaml_load(task.additional_info) for p in additional_info.get('child_processes', []): kill.apply_async((p, ), queue=queue, retry=False) provider.change_status(task, status) return task.status
def __init__(self, session: Session = None): if session is None: session = Session.create_session() self._session = session self.serializer = Serializer(date_format=self.date_format, datetime_format=self.datetime_format, time_format=self.time_format)
def session(): if ROOT_FOLDER: shutil.rmtree(ROOT_FOLDER) reload(mlcomp) migrate() res = Session.create_session() yield res
def __init__(self): self.session = Session.create_session(key='SupervisorBuilder') self.logger = create_logger(self.session, 'SupervisorBuilder') self.provider = None self.computer_provider = None self.docker_provider = None self.auxiliary_provider = None self.dag_provider = None self.queues = None self.not_ran_tasks = None self.dep_status = None self.computers = None self.auxiliary = {}
def find_imports(path: str, files: List[str] = None, exclude_patterns: List[str] = None, encoding='utf-8'): res = [] raw_imports = [] files = files if files is not None \ else glob(os.path.join(path, '**', '*.py'), recursive=True) exclude_patterns = exclude_patterns \ if exclude_patterns is not None else [] spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) for file in files: if not file.endswith('.py'): continue file_rel = os.path.relpath(file, path) if spec.match_file(file_rel): continue with open(file, 'r', encoding=encoding) as f: content = f.read() try: tree = ast.parse(content) for node in ast.walk(tree): if isinstance(node, ast.Import): for subnode in node.names: raw_imports.append((subnode.name, file_rel)) elif isinstance(node, ast.ImportFrom): raw_imports.append((node.module, file_rel)) except Exception as exc: logger = create_logger(Session.create_session(), __name__) logger.error('Failed on file: %s' % file_rel) raise exc for lib, file in raw_imports: name = lib.split('.')[0] try: if name in _mapping: name = _mapping[name] version = pkg_resources.get_distribution(name).version res.append((name, version)) except Exception: pass return res
def __init__(self, id: int, repeat_count: int = 1, exit=True): self.session = Session.create_session(key='ExecuteBuilder') self.id = id self.repeat_count = repeat_count self.logger = create_logger(self.session, 'ExecuteBuilder') self.exit = exit self.provider = None self.library_provider = None self.storage = None self.task = None self.dag = None self.executor = None self.hostname = None self.docker_img = None self.worker_index = None self.queue_personal = None self.config = None self.executor_type = None
def upgrade(migrate_engine): folder = os.path.dirname(__file__) session = Session.create_session(connection_string=migrate_engine.url) provider = ReportLayoutProvider(session) try: files = os.path.join(folder, '002', 'report_layout', '*.yml') for path in glob(files): name = str(os.path.basename(path).split('.')[0]) text = open(path).read() provider.add(ReportLayout(name=name, content=text, last_modified=now()), commit=False) provider.commit() except Exception: provider.rollback() raise
def error_handler(f): name = f.__name__ wrapper_vars = {'session': Session.create_session(key=name)} wrapper_vars['logger'] = create_logger(wrapper_vars['session'], name) hostname = socket.gethostname() def wrapper(): try: f(wrapper_vars['session'], wrapper_vars['logger']) except Exception as e: if Session.sqlalchemy_error(e): Session.cleanup(name) wrapper_vars['session'] = Session.create_session(key=name) wrapper_vars['logger'] = create_logger(wrapper_vars['session'], name) wrapper_vars['logger'].error(traceback.format_exc(), ComponentType.WorkerSupervisor, hostname) return wrapper
def decorated(*args, **kwargs): global _read_session, _write_session, logger success = True status = 200 error = '' try: res = f(*args, **kwargs) except Exception as e: if Session.sqlalchemy_error(e): Session.cleanup('server.read') Session.cleanup('server.write') _read_session = Session.create_session(key='server.read') _write_session = Session.create_session(key='server.write') logger = create_logger(_write_session, __name__) logger.error( f'Requested Url: {request.path}\n\n{traceback.format_exc()}', ComponentType.API ) error = traceback.format_exc() success = False status = 500 res = None res = res or {} if isinstance(res, Response): return res res['success'] = success res['error'] = error return Response(json.dumps(res), status=status)
class FileSync: session = Session.create_session(key='FileSync') logger = create_logger(session, 'FileSync') def sync_manual(self, computer: Computer, provider: ComputerProvider): """ button sync was clicked manually """ if not computer.meta: return meta = yaml_load(computer.meta) if 'manual_sync' not in meta: return manual_sync = meta['manual_sync'] project_provider = ProjectProvider(self.session) docker_provider = DockerProvider(self.session) dockers = docker_provider.get_online() project = project_provider.by_id(manual_sync['project']) for docker in dockers: if docker.computer == computer.name: continue source = provider.by_name(docker.computer) ignore_folders = [ [join('models', project.name), []] ] sync_directed(self.session, target=computer, source=source, ignore_folders=ignore_folders) del meta['manual_sync'] computer.meta = yaml_dump(meta) provider.update() def sync(self): hostname = socket.gethostname() try: provider = ComputerProvider(self.session) task_synced_provider = TaskSyncedProvider(self.session) computer = provider.by_name(hostname) sync_start = now() if FILE_SYNC_INTERVAL == 0: time.sleep(1) else: self.sync_manual(computer, provider) computers = provider.all_with_last_activtiy() computers = [ c for c in computers if (now() - c.last_activity).total_seconds() < 10 ] computers_names = {c.name for c in computers} for c, project, tasks in task_synced_provider.for_computer( computer.name): if c.sync_with_this_computer: if c.name not in computers_names: self.logger.info(f'Computer = {c.name} ' f'is offline. Can not sync', ComponentType.WorkerSupervisor, hostname) continue if c.syncing_computer: continue ignore_folders = [ [join('models', project.name), []] ] computer.syncing_computer = c.name provider.update() sync_directed(self.session, c, computer, ignore_folders) for t in tasks: task_synced_provider.add( TaskSynced(computer=computer.name, task=t.id) ) time.sleep(FILE_SYNC_INTERVAL) computer.last_synced = sync_start computer.syncing_computer = None provider.update() except Exception as e: if Session.sqlalchemy_error(e): Session.cleanup('FileSync') self.session = Session.create_session(key='FileSync') self.logger = create_logger(self.session, 'FileSync') self.logger.error( traceback.format_exc(), ComponentType.WorkerSupervisor, hostname )
DOCKER_IMG, DOCKER_MAIN, IP, PORT, WORKER_USAGE_INTERVAL, \ SYNC_WITH_THIS_COMPUTER, CAN_PROCESS_TASKS from mlcomp.db.core import Session from mlcomp.db.enums import ComponentType, TaskStatus from mlcomp.utils.logging import create_logger from mlcomp.db.providers import DockerProvider, TaskProvider from mlcomp.utils.schedule import start_schedule from mlcomp.utils.misc import dict_func, now, disk, get_username, \ kill_child_processes from mlcomp.worker.app import app from mlcomp.db.providers import ComputerProvider from mlcomp.db.models import ComputerUsage, Computer, Docker from mlcomp.utils.misc import memory from mlcomp.worker.sync import FileSync _session = Session.create_session(key='worker') @click.group() def main(): pass def error_handler(f): name = f.__name__ wrapper_vars = {'session': Session.create_session(key=name)} wrapper_vars['logger'] = create_logger(wrapper_vars['session'], name) hostname = socket.gethostname() def wrapper():
from mlcomp.db.providers import \ ComputerProvider, \ TaskProvider, \ StepProvider, \ ProjectProvider, DockerProvider from mlcomp.report import create_report, check_statuses from mlcomp.utils.config import merge_dicts_smart, dict_from_list_str from mlcomp.utils.logging import create_logger from mlcomp.worker.executors.kaggle import Submit from mlcomp.worker.sync import sync_directed, correct_folders from mlcomp.worker.tasks import execute_by_id from mlcomp.utils.misc import memory, disk, get_username, \ get_default_network_interface, now from mlcomp.server.back.create_dags import dag_standard, dag_pipe _session = Session.create_session(key=__name__) def _dag(config: str, debug: bool = False, control_reqs=True, params: Tuple[str] = ()): logger = create_logger(_session, name='_dag') logger.info('started', ComponentType.Client) config_text = open(config, 'r').read() config_parsed = yaml_load(config_text) params = dict_from_list_str(params) config_parsed = merge_dicts_smart(config_parsed, params) config_text = yaml_dump(config_parsed)
import socket from kaggle.models import DatasetNewRequest from mlcomp.db.core import Session from mlcomp.db.enums import ComponentType from mlcomp.db.providers import ModelProvider from mlcomp.worker.executors.base.equation import Equation from mlcomp.worker.executors.base.executor import Executor from mlcomp.utils.logging import create_logger from mlcomp.utils.config import Config try: from kaggle import api except OSError: logger = create_logger(Session.create_session(), __name__) logger.warning( 'Could not find kaggle.json. ' 'Kaggle executors can not be used', ComponentType.Worker, socket.gethostname()) class DownloadType(Enum): Kaggle = 0 Link = 1 @Executor.register class Download(Executor): def __init__(self, output: str,
def downgrade(migrate_engine): session = Session.create_session(connection_string=migrate_engine.url) provider = ReportLayoutProvider(session) provider.session.query(ReportLayout).delete(synchronize_session=False) provider.session.commit()
DagProvider, DagStorageProvider, TaskProvider, LogProvider, StepProvider, \ FileProvider, AuxiliaryProvider from mlcomp.db.report_info import ReportLayoutInfo from mlcomp.server.back.supervisor import register_supervisor from mlcomp.utils.logging import create_logger from mlcomp.utils.io import from_module_path, zip_folder from mlcomp.server.back.create_dags import dag_model_add, dag_model_start from mlcomp.utils.misc import to_snake, now from mlcomp.db.models import Model, Report, ReportLayout, Task from mlcomp.utils.io import yaml_load, yaml_dump from mlcomp.worker.storage import Storage app = Flask(__name__) CORS(app) _read_session = Session.create_session(key='server.read') _write_session = Session.create_session(key='server.write') logger = create_logger(_write_session, __name__) @app.route('/', defaults={'path': ''}, methods=['GET']) @app.route('/<path:path>', methods=['GET']) def send_static(path): file = 'index.html' if '.' in path: file = path module_path = from_module_path(__file__, f'../front/dist/mlcomp/') return send_from_directory(module_path, file)