Esempio n. 1
0
    def build(self):
        try:
            # if self.fast_check():
            #     return

            self.auxiliary = {'time': now()}

            self.create_base()

            self.process_stop_tasks()

            self.process_start_dags()

            self.process_parent_tasks()

            self.load_tasks()

            self.load_computers()

            self.process_tasks()

            self.write_auxiliary()

        except ObjectDeletedError:
            pass
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(key='SupervisorBuilder')
                self.session = Session.create_session(key='SupervisorBuilder')
                self.logger = create_logger(self.session, 'SupervisorBuilder')

            self.logger.error(traceback.format_exc(), ComponentType.Supervisor)
Esempio n. 2
0
    def build(self):
        try:
            self.create_base()

            self.check_status()

            self.change_status()

            self.download()

            self.create_executor()

            self.execute()

        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(key='ExecuteBuilder')
                self.session = Session.create_session(key='ExecuteBuilder')
                self.logger.session = create_logger(self.session,
                                                    'ExecuteBuilder')

            step = self.executor.step.id if \
                (self.executor and self.executor.step) else None

            self.error(traceback.format_exc(), step)
            self.provider.change_status(self.task, TaskStatus.Failed)
            raise e
        finally:
            if app.current_task:
                app.current_task.update_state(state=states.SUCCESS)
                app.close()

            if self.exit:
                # noinspection PyProtectedMember
                os._exit(0)
Esempio n. 3
0
 def decorated(*args, **kwargs):
     global _session
     try:
         f(*args, **kwargs)
     except Exception as e:
         if Session.sqlalchemy_error(e):
             Session.cleanup(key=__name__)
             _session = Session.create_session(key=__name__)
         raise e
Esempio n. 4
0
    def sync(self):
        hostname = socket.gethostname()
        try:
            provider = ComputerProvider(self.session)
            task_synced_provider = TaskSyncedProvider(self.session)

            computer = provider.by_name(hostname)
            sync_start = now()

            if FILE_SYNC_INTERVAL == 0:
                time.sleep(1)
            else:
                computers = provider.all_with_last_activtiy()
                computers = [
                    c for c in computers
                    if (now() - c.last_activity).total_seconds() < 10
                ]
                computers_names = {c.name for c in computers}

                for c, project, tasks in task_synced_provider.for_computer(
                        computer.name):
                    if c.name not in computers_names:
                        self.logger.info(
                            f'Computer = {c.name} '
                            f'is offline. Can not sync',
                            ComponentType.WorkerSupervisor, hostname)
                        continue

                    if c.syncing_computer:
                        continue

                    excluded = list(map(str,
                                        yaml_load(project.ignore_folders)))
                    folders_excluded = [[join('data', project.name), excluded],
                                        [join('models', project.name), []]]

                    computer.syncing_computer = c.name
                    provider.update()
                    sync_directed(self.session, c, computer, folders_excluded)

                    for t in tasks:
                        task_synced_provider.add(
                            TaskSynced(computer=computer.name, task=t.id))

                    time.sleep(FILE_SYNC_INTERVAL)

            computer.last_synced = sync_start
            computer.syncing_computer = None
            provider.update()
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup('FileSync')
                self.session = Session.create_session(key='FileSync')
                self.logger = create_logger(self.session, 'FileSync')

            self.logger.error(traceback.format_exc(),
                              ComponentType.WorkerSupervisor, hostname)
Esempio n. 5
0
    def process_error(self, e: Exception):
        if Session.sqlalchemy_error(e):
            Session.cleanup('FileSync')
            self.session = Session.create_session(key='FileSync')
            self.logger = create_logger(self.session, 'FileSync')

        hostname = socket.gethostname()
        self.logger.error(
            traceback.format_exc(), ComponentType.WorkerSupervisor,
            hostname
        )
Esempio n. 6
0
    def wrapper():
        try:
            f(wrapper_vars['session'], wrapper_vars['logger'])
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(name)

                wrapper_vars['session'] = Session.create_session(key=name)
                wrapper_vars['logger'] = create_logger(wrapper_vars['session'],
                                                       name)

            wrapper_vars['logger'].error(traceback.format_exc(),
                                         ComponentType.WorkerSupervisor,
                                         hostname)
Esempio n. 7
0
def stop(logger, session: Session, task: Task, dag: Dag):
    provider = TaskProvider(session)
    if task.status > TaskStatus.InProgress.value:
        return task.status

    status = TaskStatus.Stopped
    try:
        if task.status != TaskStatus.NotRan.value:
            app.control.revoke(task.celery_id, terminate=True)
        else:
            status = TaskStatus.Skipped
    except Exception as e:
        if Session.sqlalchemy_error(e):
            try:
                logger.error(traceback.format_exc(), ComponentType.API)
            except Exception:
                pass
            raise
        logger.error(traceback.format_exc(), ComponentType.API)
    finally:
        if task.pid:
            queue = f'{task.computer_assigned}_' \
                    f'{dag.docker_img or "default"}_supervisor'
            kill.apply_async((task.pid, ), queue=queue, retry=False)

            additional_info = yaml_load(task.additional_info)
            for p in additional_info.get('child_processes', []):
                kill.apply_async((p, ), queue=queue, retry=False)
        provider.change_status(task, status)

    return task.status
Esempio n. 8
0
File: base.py Progetto: xyuan/mlcomp
 def __init__(self, session: Session = None):
     if session is None:
         session = Session.create_session()
     self._session = session
     self.serializer = Serializer(date_format=self.date_format,
                                  datetime_format=self.datetime_format,
                                  time_format=self.time_format)
Esempio n. 9
0
def session():
    if ROOT_FOLDER:
        shutil.rmtree(ROOT_FOLDER)
        reload(mlcomp)

    migrate()
    res = Session.create_session()
    yield res
Esempio n. 10
0
 def __init__(self):
     self.session = Session.create_session(key='SupervisorBuilder')
     self.logger = create_logger(self.session, 'SupervisorBuilder')
     self.provider = None
     self.computer_provider = None
     self.docker_provider = None
     self.auxiliary_provider = None
     self.dag_provider = None
     self.queues = None
     self.not_ran_tasks = None
     self.dep_status = None
     self.computers = None
     self.auxiliary = {}
Esempio n. 11
0
def find_imports(path: str,
                 files: List[str] = None,
                 exclude_patterns: List[str] = None,
                 encoding='utf-8'):
    res = []
    raw_imports = []
    files = files if files is not None \
        else glob(os.path.join(path, '**', '*.py'), recursive=True)

    exclude_patterns = exclude_patterns \
        if exclude_patterns is not None else []
    spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern,
                                        exclude_patterns)

    for file in files:
        if not file.endswith('.py'):
            continue
        file_rel = os.path.relpath(file, path)
        if spec.match_file(file_rel):
            continue

        with open(file, 'r', encoding=encoding) as f:
            content = f.read()
            try:
                tree = ast.parse(content)
                for node in ast.walk(tree):
                    if isinstance(node, ast.Import):
                        for subnode in node.names:
                            raw_imports.append((subnode.name, file_rel))
                    elif isinstance(node, ast.ImportFrom):
                        raw_imports.append((node.module, file_rel))
            except Exception as exc:
                logger = create_logger(Session.create_session(), __name__)
                logger.error('Failed on file: %s' % file_rel)
                raise exc

    for lib, file in raw_imports:
        name = lib.split('.')[0]
        try:
            if name in _mapping:
                name = _mapping[name]

            version = pkg_resources.get_distribution(name).version
            res.append((name, version))
        except Exception:
            pass

    return res
Esempio n. 12
0
    def __init__(self, id: int, repeat_count: int = 1, exit=True):
        self.session = Session.create_session(key='ExecuteBuilder')
        self.id = id
        self.repeat_count = repeat_count
        self.logger = create_logger(self.session, 'ExecuteBuilder')
        self.exit = exit

        self.provider = None
        self.library_provider = None
        self.storage = None
        self.task = None
        self.dag = None
        self.executor = None
        self.hostname = None
        self.docker_img = None
        self.worker_index = None
        self.queue_personal = None
        self.config = None
        self.executor_type = None
Esempio n. 13
0
def upgrade(migrate_engine):
    folder = os.path.dirname(__file__)
    session = Session.create_session(connection_string=migrate_engine.url)
    provider = ReportLayoutProvider(session)

    try:
        files = os.path.join(folder, '002', 'report_layout', '*.yml')
        for path in glob(files):
            name = str(os.path.basename(path).split('.')[0])
            text = open(path).read()
            provider.add(ReportLayout(name=name,
                                      content=text,
                                      last_modified=now()),
                         commit=False)

        provider.commit()
    except Exception:
        provider.rollback()
        raise
Esempio n. 14
0
def error_handler(f):
    name = f.__name__
    wrapper_vars = {'session': Session.create_session(key=name)}
    wrapper_vars['logger'] = create_logger(wrapper_vars['session'], name)

    hostname = socket.gethostname()

    def wrapper():
        try:
            f(wrapper_vars['session'], wrapper_vars['logger'])
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup(name)

                wrapper_vars['session'] = Session.create_session(key=name)
                wrapper_vars['logger'] = create_logger(wrapper_vars['session'],
                                                       name)

            wrapper_vars['logger'].error(traceback.format_exc(),
                                         ComponentType.WorkerSupervisor,
                                         hostname)

    return wrapper
Esempio n. 15
0
    def decorated(*args, **kwargs):
        global _read_session, _write_session, logger

        success = True
        status = 200
        error = ''

        try:
            res = f(*args, **kwargs)
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup('server.read')
                Session.cleanup('server.write')

                _read_session = Session.create_session(key='server.read')
                _write_session = Session.create_session(key='server.write')

                logger = create_logger(_write_session, __name__)

            logger.error(
                f'Requested Url: {request.path}\n\n{traceback.format_exc()}',
                ComponentType.API
            )

            error = traceback.format_exc()
            success = False
            status = 500
            res = None

        res = res or {}
        if isinstance(res, Response):
            return res

        res['success'] = success
        res['error'] = error

        return Response(json.dumps(res), status=status)
Esempio n. 16
0
class FileSync:
    session = Session.create_session(key='FileSync')
    logger = create_logger(session, 'FileSync')

    def sync_manual(self, computer: Computer, provider: ComputerProvider):
        """
        button sync was clicked manually
        """
        if not computer.meta:
            return

        meta = yaml_load(computer.meta)
        if 'manual_sync' not in meta:
            return

        manual_sync = meta['manual_sync']

        project_provider = ProjectProvider(self.session)
        docker_provider = DockerProvider(self.session)

        dockers = docker_provider.get_online()
        project = project_provider.by_id(manual_sync['project'])

        for docker in dockers:
            if docker.computer == computer.name:
                continue

            source = provider.by_name(docker.computer)
            ignore_folders = [
                [join('models', project.name), []]
            ]
            sync_directed(self.session, target=computer, source=source,
                          ignore_folders=ignore_folders)

        del meta['manual_sync']
        computer.meta = yaml_dump(meta)
        provider.update()

    def sync(self):
        hostname = socket.gethostname()
        try:
            provider = ComputerProvider(self.session)
            task_synced_provider = TaskSyncedProvider(self.session)

            computer = provider.by_name(hostname)
            sync_start = now()

            if FILE_SYNC_INTERVAL == 0:
                time.sleep(1)
            else:
                self.sync_manual(computer, provider)

                computers = provider.all_with_last_activtiy()
                computers = [
                    c for c in computers
                    if (now() - c.last_activity).total_seconds() < 10
                ]
                computers_names = {c.name for c in computers}

                for c, project, tasks in task_synced_provider.for_computer(
                        computer.name):
                    if c.sync_with_this_computer:
                        if c.name not in computers_names:
                            self.logger.info(f'Computer = {c.name} '
                                             f'is offline. Can not sync',
                                             ComponentType.WorkerSupervisor,
                                             hostname)
                            continue

                        if c.syncing_computer:
                            continue

                        ignore_folders = [
                            [join('models', project.name), []]
                        ]

                        computer.syncing_computer = c.name
                        provider.update()

                        sync_directed(self.session, c, computer,
                                      ignore_folders)

                    for t in tasks:
                        task_synced_provider.add(
                            TaskSynced(computer=computer.name, task=t.id)
                        )

                    time.sleep(FILE_SYNC_INTERVAL)

            computer.last_synced = sync_start
            computer.syncing_computer = None
            provider.update()
        except Exception as e:
            if Session.sqlalchemy_error(e):
                Session.cleanup('FileSync')
                self.session = Session.create_session(key='FileSync')
                self.logger = create_logger(self.session, 'FileSync')

            self.logger.error(
                traceback.format_exc(), ComponentType.WorkerSupervisor,
                hostname
            )
Esempio n. 17
0
    DOCKER_IMG, DOCKER_MAIN, IP, PORT, WORKER_USAGE_INTERVAL, \
    SYNC_WITH_THIS_COMPUTER, CAN_PROCESS_TASKS
from mlcomp.db.core import Session
from mlcomp.db.enums import ComponentType, TaskStatus
from mlcomp.utils.logging import create_logger
from mlcomp.db.providers import DockerProvider, TaskProvider
from mlcomp.utils.schedule import start_schedule
from mlcomp.utils.misc import dict_func, now, disk, get_username, \
    kill_child_processes
from mlcomp.worker.app import app
from mlcomp.db.providers import ComputerProvider
from mlcomp.db.models import ComputerUsage, Computer, Docker
from mlcomp.utils.misc import memory
from mlcomp.worker.sync import FileSync

_session = Session.create_session(key='worker')


@click.group()
def main():
    pass


def error_handler(f):
    name = f.__name__
    wrapper_vars = {'session': Session.create_session(key=name)}
    wrapper_vars['logger'] = create_logger(wrapper_vars['session'], name)

    hostname = socket.gethostname()

    def wrapper():
Esempio n. 18
0
from mlcomp.db.providers import \
    ComputerProvider, \
    TaskProvider, \
    StepProvider, \
    ProjectProvider, DockerProvider
from mlcomp.report import create_report, check_statuses
from mlcomp.utils.config import merge_dicts_smart, dict_from_list_str
from mlcomp.utils.logging import create_logger
from mlcomp.worker.executors.kaggle import Submit
from mlcomp.worker.sync import sync_directed, correct_folders
from mlcomp.worker.tasks import execute_by_id
from mlcomp.utils.misc import memory, disk, get_username, \
    get_default_network_interface, now
from mlcomp.server.back.create_dags import dag_standard, dag_pipe

_session = Session.create_session(key=__name__)


def _dag(config: str,
         debug: bool = False,
         control_reqs=True,
         params: Tuple[str] = ()):
    logger = create_logger(_session, name='_dag')
    logger.info('started', ComponentType.Client)

    config_text = open(config, 'r').read()
    config_parsed = yaml_load(config_text)
    params = dict_from_list_str(params)
    config_parsed = merge_dicts_smart(config_parsed, params)
    config_text = yaml_dump(config_parsed)
Esempio n. 19
0
import socket

from kaggle.models import DatasetNewRequest

from mlcomp.db.core import Session
from mlcomp.db.enums import ComponentType
from mlcomp.db.providers import ModelProvider
from mlcomp.worker.executors.base.equation import Equation
from mlcomp.worker.executors.base.executor import Executor
from mlcomp.utils.logging import create_logger
from mlcomp.utils.config import Config

try:
    from kaggle import api
except OSError:
    logger = create_logger(Session.create_session(), __name__)
    logger.warning(
        'Could not find kaggle.json. '
        'Kaggle executors can not be used', ComponentType.Worker,
        socket.gethostname())


class DownloadType(Enum):
    Kaggle = 0
    Link = 1


@Executor.register
class Download(Executor):
    def __init__(self,
                 output: str,
Esempio n. 20
0
def downgrade(migrate_engine):
    session = Session.create_session(connection_string=migrate_engine.url)
    provider = ReportLayoutProvider(session)
    provider.session.query(ReportLayout).delete(synchronize_session=False)
    provider.session.commit()
Esempio n. 21
0
    DagProvider, DagStorageProvider, TaskProvider, LogProvider, StepProvider, \
    FileProvider, AuxiliaryProvider
from mlcomp.db.report_info import ReportLayoutInfo
from mlcomp.server.back.supervisor import register_supervisor
from mlcomp.utils.logging import create_logger
from mlcomp.utils.io import from_module_path, zip_folder
from mlcomp.server.back.create_dags import dag_model_add, dag_model_start
from mlcomp.utils.misc import to_snake, now
from mlcomp.db.models import Model, Report, ReportLayout, Task
from mlcomp.utils.io import yaml_load, yaml_dump
from mlcomp.worker.storage import Storage

app = Flask(__name__)
CORS(app)

_read_session = Session.create_session(key='server.read')
_write_session = Session.create_session(key='server.write')

logger = create_logger(_write_session, __name__)


@app.route('/', defaults={'path': ''}, methods=['GET'])
@app.route('/<path:path>', methods=['GET'])
def send_static(path):
    file = 'index.html'
    if '.' in path:
        file = path

    module_path = from_module_path(__file__, f'../front/dist/mlcomp/')
    return send_from_directory(module_path, file)