Exemple #1
0
def read_config():  # 读取已储存的学号密码密钥
    rf = open('config', 'rb')
    nonce, ciphertext, tag = [rf.read(x) for x in (16, 64, -1)]
    rf.close()
    data = encrypt_utils.decrypt(nonce, ciphertext, tag)
    if not data:
        return read_config()
    name, password = data.split(split_text)
    return {'name': name, 'pass': password}
Exemple #2
0
    def get(self, request):
        try:
            # admin_id = request.employee.id
            admin_id = 1

            ### not sure which one will work
            admin_auth = Employee.objects.get(id=admin_id).auth.id
            # admin_auth = request.employee.auth

            # if the user is not an admin then...
            if admin_auth != 1:
                return JsonResponse({"message": "NO_AUTHORIZATION"},
                                    status=403)

            # pagination
            limit = 6
            queries = dict(request.GET)

            if queries.get('offset'):
                offset = int(queries['offset'][0])
            else:
                offset = 0

            if offset > len(employee_list):
                return JsonRespose({'message': 'OFFSET_OUT_OF_RANGE'},
                                   status=400)

            # search filter by name_kor
            if queries.get('search'):
                conditions = []
                search_list = queries.get('search')[0].split(' ')
                for name in search_list:
                    conditions.append(Q(name_kor__icontains=name))
                employee_list = Employee.objects.filter(reduce(OR, conditions))
            else:
                employee_list = Employee.objects.all()

            # pagination
            hr_mgmt_page_list = [hr for hr in employee_list
                                 ][offset:offset + limit]

            returning_list = [{
                'id':
                hr.id,
                'no':
                hr_mgmt_page_list.index(hr) + 1,
                'name':
                hr.name_kor,
                'nickname':
                hr.nickname,
                'mobile':
                hr.mobile,
                'dob':
                encrypt_utils.decrypt(
                    hr.rrn,
                    my_settings.SECRET.get('random')).decode('utf-8')[:6],
                'email':
                hr.company_email,
                'joined_at':
                EmployeeDetail.objects.get(employee_id=hr.id).joined_at
            } for hr in hr_mgmt_page_list]

            return JsonResponse(
                {
                    "employees": returning_list,
                    "total_employees": len(employee_list)
                },
                status=200)

        except ValueError as e:
            return JsonResponse({"message": f"VALUE_ERROR:{e}"}, status=400)
Exemple #3
0
 def decryption(info):
     return encrypt_utils.decrypt(
         target_employee[info],
         my_settings.SECRET.get('random')).decode('utf-8')
Exemple #4
0
def execute_actor(actor_id,
                  worker_id,
                  execution_id,
                  image,
                  msg,
                  user=None,
                  d={},
                  privileged=False,
                  mounts=[],
                  leave_container=False,
                  fifo_host_path=None,
                  socket_host_path=None,
                  mem_limit=None,
                  max_cpus=None,
                  tenant=None):
    """
    Creates and runs an actor container and supervises the execution, collecting statistics about resource consumption
    from the Docker daemon.

    :param actor_id: the dbid of the actor; for updating worker status
    :param worker_id: the worker id; also for updating worker status
    :param execution_id: the id of the execution.
    :param image: the actor's image; worker must have already downloaded this image to the local docker registry.
    :param msg: the message being passed to the actor.
    :param user: string in the form {uid}:{gid} representing the uid and gid to run the command as.
    :param d: dictionary representing the environment to instantiate within the actor container.
    :param privileged: whether this actor is "privileged"; i.e., its container should run in privileged mode with the
    docker daemon mounted.
    :param mounts: list of dictionaries representing the mounts to add; each dictionary mount should have 3 keys:
    host_path, container_path and format (which should have value 'ro' or 'rw').
    :param fifo_host_path: If not None, a string representing a path on the host to a FIFO used for passing binary data to the actor.
    :param socket_host_path: If not None, a string representing a path on the host to a socket used for collecting results from the actor.
    :param mem_limit: The maximum amount of memory the Actor container can use; should be the same format as the --memory Docker flag.
    :param max_cpus: The maximum number of CPUs each actor will have available to them. Does not guarantee these CPU resources; serves as upper bound.
    :return: result (dict), logs (str) - `result`: statistics about resource consumption; `logs`: output from docker logs.
    """
    logger.debug(
        f"top of execute_actor(); actor_id: {actor_id}; tenant: {tenant} (worker {worker_id};{execution_id})"
    )

    # get any configs for this actor
    actor_configs = {}
    config_list = []
    # list of all aliases for the actor
    alias_list = []
    # the actor_id passed in is the dbid
    actor_human_id = Actor.get_display_id(tenant, actor_id)

    for alias in alias_store.items():
        logger.debug(f"checking alias: {alias}")
        if actor_human_id == alias['actor_id'] and tenant == alias['tenant']:
            alias_list.append(alias['alias'])
    logger.debug(f"alias_list: {alias_list}")
    # loop through configs to look for any that apply to this actor
    for config in configs_store.items():
        # first look for the actor_id itself
        if actor_human_id in config['actors']:
            logger.debug(f"actor_id matched; adding config {config}")
            config_list.append(config)
        else:
            logger.debug("actor id did not match; checking aliases...")
            # if we didn't find the actor_id, look for ay of its aliases
            for alias in alias_list:
                if alias in config['actors']:
                    # as soon as we find it, append and get out (only want to add once)
                    logger.debug(
                        f"alias {alias} matched; adding config: {config}")
                    config_list.append(config)
                    break
    logger.debug(f"got config_list: {config_list}")
    # for each config, need to check for secrets and decrypt ---
    for config in config_list:
        logger.debug('checking for secrets')
        try:
            if config['is_secret']:
                value = encrypt_utils.decrypt(config['value'])
                actor_configs[config['name']] = value
            else:
                actor_configs[config['name']] = config['value']

        except Exception as e:
            logger.error(
                f'something went wrong checking is_secret for config: {config}; e: {e}'
            )

    logger.debug(f"final actor configs: {actor_configs}")
    d['_actor_configs'] = actor_configs

    # initially set the global force_quit variable to False
    globals.force_quit = False

    # initial stats object, environment, binds and volumes
    result = {'cpu': 0, 'io': 0, 'runtime': 0}

    # instantiate docker client
    cli = docker.APIClient(base_url=dd, version="auto")

    # don't try to pass binary messages through the environment as these can cause
    # broken pipe errors. the binary data will be passed through the FIFO momentarily.
    if not fifo_host_path:
        d['MSG'] = msg
    binds = {}
    volumes = []

    # if container is privileged, mount the docker daemon so that additional
    # containers can be started.
    logger.debug("privileged: {};(worker {};{})".format(
        privileged, worker_id, execution_id))
    if privileged:
        binds = {
            '/var/run/docker.sock': {
                'bind': '/var/run/docker.sock',
                'ro': False
            }
        }
        volumes = ['/var/run/docker.sock']

    # add a bind key and dictionary as well as a volume for each mount
    for m in mounts:
        binds[m.get('host_path')] = {
            'bind': m.get('container_path'),
            'ro': m.get('format') == 'ro'
        }
        volumes.append(m.get('host_path'))

    # mem_limit
    # -1 => unlimited memory
    if mem_limit == '-1':
        mem_limit = None

    # max_cpus
    try:
        max_cpus = int(max_cpus)
    except:
        max_cpus = None
    # -1 => unlimited cpus
    if max_cpus == -1:
        max_cpus = None

    host_config = cli.create_host_config(binds=binds,
                                         privileged=privileged,
                                         mem_limit=mem_limit,
                                         nano_cpus=max_cpus)
    logger.debug("host_config object created by (worker {};{}).".format(
        worker_id, execution_id))

    # write binary data to FIFO if it exists:
    fifo = None
    if fifo_host_path:
        try:
            fifo = os.open(fifo_host_path, os.O_RDWR)
            os.write(fifo, msg)
        except Exception as e:
            logger.error(
                "Error writing the FIFO. Exception: {};(worker {};{})".format(
                    e, worker_id, execution_id))
            os.remove(fifo_host_path)
            raise DockerStartContainerError("Error writing to fifo: {}; "
                                            "(worker {};{})".format(
                                                e, worker_id, execution_id))

    # set up results socket -----------------------
    # make sure socket doesn't already exist:
    try:
        os.unlink(socket_host_path)
    except OSError as e:
        if os.path.exists(socket_host_path):
            logger.error(
                "socket at {} already exists; Exception: {}; (worker {};{})".
                format(socket_host_path, e, worker_id, execution_id))
            raise DockerStartContainerError(
                "Got an OSError trying to create the results docket; "
                "exception: {}".format(e))

    # use retry logic since, when the compute node is under load, we see errors initially trying to create the socket
    # server object.
    keep_trying = True
    count = 0
    server = None
    while keep_trying and count < 10:
        keep_trying = False
        count = count + 1
        try:
            server = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
        except Exception as e:
            keep_trying = True
            logger.info("Could not instantiate socket at {}. "
                        "Count: {}; Will keep trying. "
                        "Exception: {}; type: {}; (worker {};{})".format(
                            socket_host_path, count, e, type(e), worker_id,
                            execution_id))
        try:
            server.bind(socket_host_path)
        except Exception as e:
            keep_trying = True
            logger.info("Could not bind socket at {}. "
                        "Count: {}; Will keep trying. "
                        "Exception: {}; type: {}; (worker {};{})".format(
                            socket_host_path, count, e, type(e), worker_id,
                            execution_id))
        try:
            os.chmod(socket_host_path, 0o777)
            logger.debug(
                "results socket permissions set to 777. socket_host_path: {}".
                format(socket_host_path))
        except Exception as e:
            msg = f"Got exception trying to set permissions on the results socket. Not sure what to do. e: {e}"
            logger.error(msg)
            # for now, we'll just swallow it but this is really a TODO.

        try:
            server.settimeout(RESULTS_SOCKET_TIMEOUT)
        except Exception as e:
            keep_trying = True
            logger.info("Could not set timeout for socket at {}. "
                        "Count: {}; Will keep trying. "
                        "Exception: {}; type: {}; (worker {};{})".format(
                            socket_host_path, count, e, type(e), worker_id,
                            execution_id))
    if not server:
        msg = "Failed to instantiate results socket. " \
              "Abaco compute host could be overloaded. (worker {};{})".format(worker_id, execution_id)
        logger.error(msg)
        raise DockerStartContainerError(msg)

    logger.debug(
        "results socket server instantiated. path: {} (worker {};{})".format(
            socket_host_path, worker_id, execution_id))

    # instantiate the results channel:
    results_ch = ExecutionResultsChannel(actor_id, execution_id)

    # create and start the container
    logger.debug("Final container environment: {};(worker {};{})".format(
        d, worker_id, execution_id))
    logger.debug(
        "Final binds: {} and host_config: {} for the container.(worker {};{})".
        format(binds, host_config, worker_id, execution_id))
    container = cli.create_container(image=image,
                                     environment=d,
                                     user=user,
                                     volumes=volumes,
                                     host_config=host_config)
    # get the UTC time stamp
    start_time = get_current_utc_time()
    # start the timer to track total execution time.
    start = timeit.default_timer()
    logger.debug("right before cli.start: {}; container id: {}; "
                 "(worker {};{})".format(start, container.get('Id'), worker_id,
                                         execution_id))
    try:
        cli.start(container=container.get('Id'))
    except Exception as e:
        # if there was an error starting the container, user will need to debug
        logger.info(
            "Got exception starting actor container: {}; (worker {};{})".
            format(e, worker_id, execution_id))
        raise DockerStartContainerError(
            "Could not start container {}. Exception {}".format(
                container.get('Id'), str(e)))

    # local bool tracking whether the actor container is still running
    running = True
    Execution.update_status(actor_id, execution_id, RUNNING)

    logger.debug("right before creating stats_cli: {}; (worker {};{})".format(
        timeit.default_timer(), worker_id, execution_id))
    # create a separate cli for checking stats objects since these should be fast and we don't want to wait
    stats_cli = docker.APIClient(base_url=dd, timeout=1, version="auto")
    logger.debug("right after creating stats_cli: {}; (worker {};{})".format(
        timeit.default_timer(), worker_id, execution_id))

    # under load, we can see UnixHTTPConnectionPool ReadTimeout's trying to create the stats_obj
    # so here we are trying up to 3 times to create the stats object for a possible total of 3s
    # timeouts
    ct = 0
    stats_obj = None
    logs = None
    while ct < 3:
        try:
            stats_obj = stats_cli.stats(container=container.get('Id'),
                                        decode=True)
            break
        except ReadTimeout:
            ct += 1
        except Exception as e:
            logger.error(
                "Unexpected exception creating stats_obj. Exception: {}; (worker {};{})"
                .format(e, worker_id, execution_id))
            # in this case, we need to kill the container since we cannot collect stats;
            # UPDATE - 07-2018: under load, a errors can occur attempting to create the stats object.
            # the container could still be running; we need to explicitly check the container status
            # to be sure.
    logger.debug(
        "right after attempting to create stats_obj: {}; (worker {};{})".
        format(timeit.default_timer(), worker_id, execution_id))
    # a counter of the number of iterations through the main "running" loop;
    # this counter is used to determine when less frequent actions, such as log aggregation, need to run.
    loop_idx = 0
    log_ex = Actor.get_actor_log_ttl(actor_id)
    while running and not globals.force_quit:
        loop_idx += 1
        logger.debug(
            "top of while running loop; loop_idx: {}".format(loop_idx))
        datagram = None
        stats = None
        try:
            datagram = server.recv(MAX_RESULT_FRAME_SIZE)
        except socket.timeout:
            pass
        except Exception as e:
            logger.error(
                "got exception from server.recv: {}; (worker {};{})".format(
                    e, worker_id, execution_id))
        logger.debug(
            "right after try/except datagram block: {}; (worker {};{})".format(
                timeit.default_timer(), worker_id, execution_id))
        if datagram:
            try:
                results_ch.put(datagram)
            except Exception as e:
                logger.error(
                    "Error trying to put datagram on results channel. "
                    "Exception: {}; (worker {};{})".format(
                        e, worker_id, execution_id))
        logger.debug("right after results ch.put: {}; (worker {};{})".format(
            timeit.default_timer(), worker_id, execution_id))

        # only try to collect stats if we have a stats_obj:
        if stats_obj:
            logger.debug(
                "we have a stats_obj; trying to collect stats. (worker {};{})".
                format(worker_id, execution_id))
            try:
                logger.debug(
                    "waiting on a stats obj: {}; (worker {};{})".format(
                        timeit.default_timer(), worker_id, execution_id))
                stats = next(stats_obj)
                logger.debug("got the stats obj: {}; (worker {};{})".format(
                    timeit.default_timer(), worker_id, execution_id))
            except StopIteration:
                # we have read the last stats object - no need for processing
                logger.debug(
                    "Got StopIteration; no stats object. (worker {};{})".
                    format(worker_id, execution_id))
            except ReadTimeoutError:
                # this is a ReadTimeoutError from docker, not requests. container is finished.
                logger.info(
                    "next(stats) just timed out: {}; (worker {};{})".format(
                        timeit.default_timer(), worker_id, execution_id))
                # UPDATE - 07-2018: under load, a ReadTimeoutError from the attempt to get a stats object
                # does NOT imply the container has stopped; we need to explicitly check the container status
                # to be sure.

        # if we got a stats object, add it to the results; it is possible stats collection timed out and the object
        # is None
        if stats:
            logger.debug("adding stats to results; (worker {};{})".format(
                worker_id, execution_id))
            try:
                result['cpu'] += stats['cpu_stats']['cpu_usage']['total_usage']
            except KeyError as e:
                logger.info(
                    "Got a KeyError trying to fetch the cpu object: {}; "
                    "(worker {};{})".format(e, worker_id, execution_id))
            try:
                result['io'] += stats['networks']['eth0']['rx_bytes']
            except KeyError as e:
                logger.info(
                    "Got KeyError exception trying to grab the io object. "
                    "running: {}; Exception: {}; (worker {};{})".format(
                        running, e, worker_id, execution_id))

        # grab the logs every 5th iteration --
        if loop_idx % 5 == 0:
            logs = cli.logs(container.get('Id'))
            Execution.set_logs(execution_id, logs, actor_id, tenant, worker_id,
                               log_ex)
            logs = None

        # checking the container status to see if it is still running ----
        if running:
            logger.debug(
                "about to check container status: {}; (worker {};{})".format(
                    timeit.default_timer(), worker_id, execution_id))
            # we need to wait for the container id to be available
            i = 0
            c = None
            while i < 10:
                try:
                    c = cli.containers(all=True,
                                       filters={'id': container.get('Id')})[0]
                    break
                except IndexError:
                    logger.error(
                        "Got an IndexError trying to get the container object. "
                        "(worker {};{})".format(worker_id, execution_id))
                    time.sleep(0.1)
                    i += 1
            logger.debug(
                "done checking status: {}; i: {}; (worker {};{})".format(
                    timeit.default_timer(), i, worker_id, execution_id))
            # if we were never able to get the container object, we need to stop processing and kill this
            # worker; the docker daemon could be under heavy load, but we need to not launch another
            # actor container with this worker, because the existing container may still be running,
            if i == 10 or not c:
                # we'll try to stop the container
                logger.error(
                    "Never could retrieve the container object! Attempting to stop container; "
                    "container id: {}; (worker {};{})".format(
                        container.get('Id'), worker_id, execution_id))
                # stop_container could raise an exception - if so, we let it pass up and have the worker
                # shut itself down.
                stop_container(cli, container.get('Id'))
                logger.info("container {} stopped. (worker {};{})".format(
                    container.get('Id'), worker_id, execution_id))

                # if we were able to stop the container, we can set running to False and keep the
                # worker running
                running = False
                continue
            state = c.get('State')
            if not state == 'running':
                logger.debug(
                    "container finished, final state: {}; (worker {};{})".
                    format(state, worker_id, execution_id))
                running = False
                continue
            else:
                # container still running; check if a force_quit has been sent OR
                # we are beyond the max_run_time
                runtime = timeit.default_timer() - start
                if globals.force_quit or (max_run_time > 0
                                          and max_run_time < runtime):
                    logs = cli.logs(container.get('Id'))
                    if globals.force_quit:
                        logger.info(
                            "issuing force quit: {}; (worker {};{})".format(
                                timeit.default_timer(), worker_id,
                                execution_id))
                    else:
                        logger.info(
                            "hit runtime limit: {}; (worker {};{})".format(
                                timeit.default_timer(), worker_id,
                                execution_id))
                    cli.stop(container.get('Id'))
                    running = False
            logger.debug(
                "right after checking container state: {}; (worker {};{})".
                format(timeit.default_timer(), worker_id, execution_id))
    logger.info("container stopped:{}; (worker {};{})".format(
        timeit.default_timer(), worker_id, execution_id))
    stop = timeit.default_timer()
    globals.force_quit = False

    # get info from container execution, including exit code; Exceptions from any of these commands
    # should not cause the worker to shutdown or prevent starting subsequent actor containers.
    exit_code = 'undetermined'
    try:
        container_info = cli.inspect_container(container.get('Id'))
        try:
            container_state = container_info['State']
            try:
                exit_code = container_state['ExitCode']
            except KeyError as e:
                logger.error("Could not determine ExitCode for container {}. "
                             "Exception: {}; (worker {};{})".format(
                                 container.get('Id'), e, worker_id,
                                 execution_id))
                exit_code = 'undetermined'
            # Converting ISO8601 times to unix timestamps
            try:
                # Slicing to 23 to account for accuracy up to milliseconds and replace to get rid of ISO 8601 'Z'
                startedat_ISO = container_state['StartedAt'].replace('Z',
                                                                     '')[:23]
                finishedat_ISO = container_state['FinishedAt'].replace(
                    'Z', '')[:23]

                container_state['StartedAt'] = datetime.datetime.strptime(
                    startedat_ISO, "%Y-%m-%dT%H:%M:%S.%f")
                container_state['FinishedAt'] = datetime.datetime.strptime(
                    finishedat_ISO, "%Y-%m-%dT%H:%M:%S.%f")
            except Exception as e:
                logger.error(
                    f"Datetime conversion failed for container {container.get('Id')}. ",
                    f"Exception: {e}; (worker {worker_id};{execution_id})")
                container_state = {'unavailable': True}
        except KeyError as e:
            logger.error(
                f"Could not determine final state for container {container.get('Id')}. ",
                f"Exception: {e}; (worker {worker_id};{execution_id})")
            container_state = {'unavailable': True}
    except docker.errors.APIError as e:
        logger.error(f"Could not inspect container {container.get('Id')}. ",
                     f"Exception: {e}; (worker {worker_id};{execution_id})")
    logger.debug(
        "right after getting container_info: {}; (worker {};{})".format(
            timeit.default_timer(), worker_id, execution_id))
    # get logs from container
    if not logs:
        logs = cli.logs(container.get('Id'))
    if not logs:
        # there are issues where container do not have logs associated with them when they should.
        logger.info("Container id {} had NO logs associated with it. "
                    "(worker {};{})".format(container.get('Id'), worker_id,
                                            execution_id))
    logger.debug(
        "right after getting container logs: {}; (worker {};{})".format(
            timeit.default_timer(), worker_id, execution_id))

    # get any additional results from the execution:
    while True:
        datagram = None
        try:
            datagram = server.recv(MAX_RESULT_FRAME_SIZE)
        except socket.timeout:
            break
        except Exception as e:
            logger.error(
                "Got exception from server.recv: {}; (worker {};{})".format(
                    e, worker_id, execution_id))
        if datagram:
            try:
                results_ch.put(datagram)
            except Exception as e:
                logger.error(
                    "Error trying to put datagram on results channel. "
                    "Exception: {}; (worker {};{})".format(
                        e, worker_id, execution_id))
    logger.debug(
        "right after getting last execution results from datagram socket: {}; "
        "(worker {};{})".format(timeit.default_timer(), worker_id,
                                execution_id))
    if socket_host_path:
        server.close()
        os.remove(socket_host_path)
    logger.debug("right after removing socket: {}; (worker {};{})".format(
        timeit.default_timer(), worker_id, execution_id))

    # remove actor container with retrying logic -- check for specific filesystem errors from the docker daemon:
    if not leave_container:
        keep_trying = True
        count = 0
        while keep_trying and count < 10:
            keep_trying = False
            count = count + 1
            try:
                cli.remove_container(container=container)
                logger.info("Actor container removed. (worker {};{})".format(
                    worker_id, execution_id))
            except Exception as e:
                # if the container is already gone we definitely want to quit:
                if 'No such container' in str(e):
                    logger.info("Got 'no such container' exception - quiting. "
                                "Exception: {}; (worker {};{})".format(
                                    e, worker_id, execution_id))
                    break
                # if we get a resource busy/internal server error from docker, we need to keep trying to remove the
                # container.
                elif 'device or resource busy' in str(
                        e) or 'failed to remove root filesystem' in str(e):
                    logger.error(
                        "Got resource busy/failed to remove filesystem exception trying to remove "
                        "actor container; will keep trying."
                        "Count: {}; Exception: {}; (worker {};{})".format(
                            count, e, worker_id, execution_id))
                    time.sleep(1)
                    keep_trying = True
                else:
                    logger.error(
                        "Unexpected exception trying to remove actor container. Giving up."
                        "Exception: {}; type: {}; (worker {};{})".format(
                            e, type(e), worker_id, execution_id))
    else:
        logger.debug("leaving actor container since leave_container was True. "
                     "(worker {};{})".format(worker_id, execution_id))
    logger.debug(
        "right after removing actor container: {}; (worker {};{})".format(
            timeit.default_timer(), worker_id, execution_id))

    if fifo_host_path:
        try:
            os.close(fifo)
            os.remove(fifo_host_path)
        except Exception as e:
            logger.debug(
                f"got Exception trying to clean up fifo_host_path; e: {e}")
    if results_ch:
        # check if the length of the results channel is empty and if so, delete it --
        if len(results_ch._queue._queue) == 0:
            try:
                results_ch.delete()
            except Exception as e:
                logger.warn(
                    f"Got exception trying to delete the results_ch, swallowing it; Exception: {e}"
                )
        else:
            try:
                results_ch.close()
            except Exception as e:
                logger.warn(
                    f"Got exception trying to close the results_ch, swallowing it; Exception: {e}"
                )
    result['runtime'] = int(stop - start)
    logger.debug(
        "right after removing fifo; about to return: {}; (worker {};{})".
        format(timeit.default_timer(), worker_id, execution_id))
    return result, logs, container_state, exit_code, start_time