def stop_arena(unit_id):
    """
    Arenas have server builds for the unit as well as individual workouts. This function
    stops all of these servers
    :param unit_id: The build ID of the arena
    :return: None
    """
    # First stop the unit's servers
    result = compute.instances().list(
        project=project, zone=zone,
        filter='name = {}*'.format(unit_id)).execute()
    unit = ds_client.get(ds_client.key('cybergym-unit', unit_id))
    unit['arena']['running'] = False
    ds_client.put(unit)
    g_logger = log_client.logger('arena-actions')
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()
        g_logger.log_struct(
            {"message": "Stopped servers for arena {}".format(unit_id)},
            severity=LOG_LEVELS)
    else:
        g_logger.log_struct(
            {"message": "No servers in arena {} to stop".format(unit_id)},
            severity=LOG_LEVELS.WARNING)

    for workout_id in unit['workouts']:
        g_logger = log_client.logger(str(workout_id))
        result = compute.instances().list(
            project=project, zone=zone,
            filter='name = {}*'.format(workout_id)).execute()
        workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
        workout['running'] = False
        ds_client.put(workout)
        if 'items' in result:
            for vm_instance in result['items']:
                response = compute.instances().stop(
                    project=project, zone=zone,
                    instance=vm_instance["name"]).execute()
            g_logger.log_struct(
                {
                    "message":
                    "Workout servers stopped for workout {}".format(workout_id)
                },
                severity=LOG_LEVELS.INFO)

        else:
            g_logger.log_struct(
                {
                    "message":
                    "No servers to stop for workout {}".format(workout_id)
                },
                severity=LOG_LEVELS.WARNING)
Ejemplo n.º 2
0
def stop_everything():
    result = compute.instances().list(project=project, zone=zone).execute()
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()

        print("Workouts stopped")
    else:
        print("No workouts to stop")
def stop_everything():
    g_logger = log_client.logger('workout-actions')
    result = compute.instances().list(project=project, zone=zone).execute()
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()
        g_logger.log_struct(
            {
                "message": "All machines stopped (daily cleanup)",
            },
            severity=LOG_LEVELS.INFO)
    else:
        g_logger.log_struct({"message": "No workouts to stop (daily cleanup)"},
                            severity=LOG_LEVELS.WARNING)
Ejemplo n.º 4
0
def stop_workout(workout_id):
    result = compute.instances().list(
        project=project, zone=zone,
        filter='name = {}*'.format(workout_id)).execute()
    workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
    state_transition(entity=workout,
                     new_state=BUILD_STATES.READY,
                     existing_state=BUILD_STATES.RUNNING)
    ds_client.put(workout)
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()

        print("Workouts stopped")
    else:
        print("No workouts to stop")
def stop_workout(workout_id):
    result = compute.instances().list(
        project=project, zone=zone,
        filter='name = {}*'.format(workout_id)).execute()
    workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
    state_transition(entity=workout,
                     new_state=BUILD_STATES.READY,
                     existing_state=BUILD_STATES.RUNNING)
    start_time = None
    if 'start_time' in workout:
        start_time = workout['start_time']
        stop_time = calendar.timegm(time.gmtime())
        runtime = int(stop_time) - int(start_time)
        if 'runtime_counter' in workout:
            accumulator = workout['runtime_counter']
            new_runtime = int(accumulator) + runtime
            workout['runtime_counter'] = new_runtime
        else:
            workout['runtime_counter'] = runtime
    ds_client.put(workout)
    query_workout_servers = ds_client.query(kind='cybergym-server')
    query_workout_servers.add_filter("workout", "=", workout_id)
    for server in list(query_workout_servers.fetch()):
        # Publish to a server management topic
        pubsub_topic = PUBSUB_TOPICS.MANAGE_SERVER
        publisher = pubsub_v1.PublisherClient()
        topic_path = publisher.topic_path(project, pubsub_topic)
        future = publisher.publish(topic_path,
                                   data=b'Server Build',
                                   server_name=server['name'],
                                   action=SERVER_ACTIONS.STOP)
        print(future.result())
    g_logger = log_client.logger(str(workout_id))
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()
        g_logger.log_struct({"message": "Workout stopped"},
                            severity=LOG_LEVELS.INFO)
    else:
        g_logger.log_struct({"message": "No workouts to stop"},
                            severity=LOG_LEVELS.WARNING)
Ejemplo n.º 6
0
def are_servers_deleted(build_id):
    i = 0
    servers_deleted = False
    while not servers_deleted and i < 6:
        result = compute.instances().list(project=project, zone=zone, filter=f"name = {build_id}*").execute()
        if 'items' not in result:
            servers_deleted = True
        else:
            i += 1
            time.sleep(10)
    return servers_deleted
Ejemplo n.º 7
0
def stop_arena(unit_id):
    """
    Arenas have server builds for the unit as well as individual workouts. This function
    stops all of these servers
    :param unit_id: The build ID of the arena
    :return: None
    """
    # First stop the unit's servers
    result = compute.instances().list(
        project=project, zone=zone,
        filter='name = {}*'.format(unit_id)).execute()
    unit = ds_client.get(ds_client.key('cybergym-unit', unit_id))
    unit['arena']['running'] = False
    ds_client.put(unit)
    if 'items' in result:
        for vm_instance in result['items']:
            response = compute.instances().stop(
                project=project, zone=zone,
                instance=vm_instance["name"]).execute()

        print("Unit servers stopped")
    else:
        print("No unit servers to stop")

    for workout_id in unit['workouts']:
        result = compute.instances().list(
            project=project, zone=zone,
            filter='name = {}*'.format(workout_id)).execute()
        workout = ds_client.get(ds_client.key('cybergym-workout', workout_id))
        workout['running'] = False
        ds_client.put(workout)
        if 'items' in result:
            for vm_instance in result['items']:
                response = compute.instances().stop(
                    project=project, zone=zone,
                    instance=vm_instance["name"]).execute()

            print("Workout servers stopped for %s" % workout_id)
        else:
            print("No workout servers to stop for %s" % workout_id)
Ejemplo n.º 8
0
def server_delete(server_name):
    server = ds_client.get(ds_client.key('cybergym-server', server_name))

    state_transition(entity=server, new_state=SERVER_STATES.DELETING)
    workout_globals.refresh_api()
    try:
        response = compute.instances().delete(project=project, zone=zone, instance=server_name).execute()
    except HttpError as exception:
        # If the server is already deleted or no longer exists,
        state_transition(entity=server, new_state=SERVER_STATES.DELETED)
        print(f"Finished deleting {server_name}")

        # If all servers in the workout have been deleted, then set the workout state to True
        build_id = server['workout']
        check_build_state_change(build_id=build_id, check_server_state=SERVER_STATES.DELETED,
                                 change_build_state=BUILD_STATES.COMPLETED_DELETING_SERVERS)
        return True
    print(f'Sent delete request to {server_name}, and waiting for response')
    i = 0
    success = False
    while not success and i < 5:
        try:
            print(f"Begin waiting for delete response from operation {response['id']}")
            compute.zoneOperations().wait(project=project, zone=zone, operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            print('Response timeout for deleting server. Trying again')
            pass
    if not success:
        print(f'Timeout in trying to delete server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False

    # If this is a student entry server, delete the DNS
    if 'student_entry' in server and server['student_entry']:
        print(f'Deleting DNS record for {server_name}')
        ip_address = server['external_ip']
        delete_dns(server['workout'], ip_address)

    state_transition(entity=server, new_state=SERVER_STATES.DELETED)
    print(f"Finished deleting {server_name}")

    # If all servers in the workout have been deleted, then set the workout state to True
    build_id = server['workout']
    check_build_state_change(build_id=build_id, check_server_state=SERVER_STATES.DELETED,
                             change_build_state=BUILD_STATES.COMPLETED_DELETING_SERVERS)
    return True

# server_start('hxckdwxwld-nested')
# server_delete('oztfvquhhi-cybergym-publicprivate')
Ejemplo n.º 9
0
def get_server_ext_address(server_name):
    """
    Provides the IP address of a given server name. Right now, this is used for managing DNS entries.
    :param server_name: The server name in the cloud project
    :return: The IP address of the server or throws an error
    """

    try:
        new_instance = compute.instances().get(project=project, zone=zone, instance=server_name).execute()
        ip_address = new_instance['networkInterfaces'][0]['accessConfigs'][0]['natIP']
    except KeyError:
        print('Server %s does not have an external IP address' % server_name)
        return False
    return ip_address
Ejemplo n.º 10
0
def server_stop(server_name):
    server = ds_client.get(ds_client.key('cybergym-server', server_name))
    g_logger = log_client.logger(str(server_name))
    state_transition(entity=server, new_state=SERVER_STATES.STOPPING)

    i = 0
    stop_success = False
    while not stop_success and i < 5:
        workout_globals.refresh_api()
        try:
            response = compute.instances().stop(
                project=project, zone=zone, instance=server_name).execute()
            stop_success = True
            g_logger.log_text(
                f'Sent job to start {server_name}, and waiting for response')
            return True
        except BrokenPipeError:
            i += 1

    return False
Ejemplo n.º 11
0
def server_start(server_name):
    """
    Starts a server based on the specification in the Datastore entity with name server_name. A guacamole server
    is also registered with DNS.
    :param server_name: The Datastore entity name of the server to start
    :return: A boolean status on the success of the start
    """
    server = ds_client.get(ds_client.key('cybergym-server', server_name))
    state_transition(entity=server, new_state=SERVER_STATES.STARTING)
    workout_globals.refresh_api()
    response = compute.instances().start(project=project, zone=zone, instance=server_name).execute()
    print(f'Sent start request to {server_name}, and waiting for response')
    i = 0
    success = False
    while not success and i < 5:
        try:
            print(f"Begin waiting for start response from operation {response['id']}")
            compute.zoneOperations().wait(project=project, zone=zone, operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            print('Response timeout for starting server. Trying again')
            pass
    if not success:
        print(f'Timeout in trying to start server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False
    # If this is the guacamole server for student entry, then register the new DNS
    if 'student_entry' in server and server['student_entry']:
        print(f'Setting DNS record for {server_name}')
        ip_address = register_student_entry(server['workout'], server_name)
        server['external_ip'] = ip_address

    state_transition(entity=server, new_state=SERVER_STATES.RUNNING)
    print(f"Finished starting {server_name}")

    # If all servers have started, then change the build state
    build_id = server['workout']
    check_build_state_change(build_id=build_id, check_server_state=SERVER_STATES.RUNNING,
                             change_build_state=BUILD_STATES.RUNNING)
    return True
Ejemplo n.º 12
0
 def _wait_for_deletion(self, wait_type=ArenaWorkoutDeleteType.SERVER):
     """
     For asynchronous deletion, wait until all jobs have completed.
     @param build_id: The id of the build to use in searching for resources
     @type build_id: String
     @param wait_type: designated type of resource
     @type wait_type: String
     @return: Status
     @rtype: Boolean
     """
     i = 0
     all_deleted = False
     while not all_deleted and i < 10:
         if wait_type == ArenaWorkoutDeleteType.SERVER:
             result = compute.instances().list(
                 project=project,
                 zone=zone,
                 filter=f"name = {self.build_id}*").execute()
         elif wait_type == ArenaWorkoutDeleteType.ROUTES:
             result = compute.routes().list(
                 project=project,
                 filter=f"name = {self.build_id}*").execute()
         elif wait_type == ArenaWorkoutDeleteType.FIREWALL_RULES:
             result = compute.firewalls().list(
                 project=project,
                 filter=f"name = {self.build_id}*").execute()
         elif wait_type == ArenaWorkoutDeleteType.NETWORK:
             result = compute.networks().list(
                 project=project,
                 filter=f"name = {self.build_id}*").execute()
         elif wait_type == ArenaWorkoutDeleteType.SUBNETWORK:
             result = compute.subnetworks().list(
                 project=project,
                 region=region,
                 filter=f"name = {self.build_id}*").execute()
         if 'items' not in result:
             all_deleted = True
         else:
             i += 1
             time.sleep(10)
     return all_deleted
Ejemplo n.º 13
0
def server_delete(server_name):
    g_logger = log_client.logger(str(server_name))
    server_list = list(
        ds_client.query(kind='cybergym-server').add_filter(
            'name', '=', str(server_name)).fetch())
    server_is_deleted = list(
        ds_client.query(kind='cybergym-server').add_filter(
            'name', '=', str(server_name)).add_filter('state', '=',
                                                      'DELETED').fetch())
    if server_is_deleted and server_list:
        g_logger.log_text(f'Server "' + server_name +
                          '" has already been deleted.')
        return True
    elif not server_list:
        g_logger.log_text(f'Server of name "' + server_name +
                          '" does not exist in datastore, unable to Delete.')
        return True
    else:
        server = ds_client.get(ds_client.key('cybergym-server', server_name))

    state_transition(entity=server, new_state=SERVER_STATES.DELETING)
    # If there are snapshots associated with this server, then delete the snapshots.
    if 'snapshot' in server and server['snapshot']:
        Snapshot.delete_snapshot(server_name)

    workout_globals.refresh_api()
    try:
        response = compute.instances().delete(project=project,
                                              zone=zone,
                                              instance=server_name).execute()
    except HttpError as exception:
        # If the server is already deleted or no longer exists,
        state_transition(entity=server, new_state=SERVER_STATES.DELETED)
        g_logger.log_text(f"Finished deleting {server_name}")

        # If all servers in the workout have been deleted, then set the workout state to True
        build_id = server['workout']
        check_build_state_change(
            build_id=build_id,
            check_server_state=SERVER_STATES.DELETED,
            change_build_state=BUILD_STATES.COMPLETED_DELETING_SERVERS)
        return True
    g_logger.log_text(
        f'Sent delete request to {server_name}, and waiting for response')
    i = 0
    success = False
    while not success and i < 5:
        try:
            g_logger.log_text(
                f"Begin waiting for delete response from operation {response['id']}"
            )
            compute.zoneOperations().wait(project=project,
                                          zone=zone,
                                          operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            g_logger.log_text(
                'Response timeout for deleting server. Trying again')
            pass
    if not success:
        g_logger.log_text(f'Timeout in trying to delete server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False

    # If this is a student entry server, delete the DNS
    if 'student_entry' in server and server['student_entry']:
        g_logger.log_text(f'Deleting DNS record for {server_name}')
        ip_address = server['external_ip']
        delete_dns(server['workout'], ip_address)

    state_transition(entity=server, new_state=SERVER_STATES.DELETED)
    g_logger.log_text(f"Finished deleting {server_name}")

    # If all servers in the workout have been deleted, then set the workout state to True
    build_id = server['workout']
    check_build_state_change(
        build_id=build_id,
        check_server_state=SERVER_STATES.DELETED,
        change_build_state=BUILD_STATES.COMPLETED_DELETING_SERVERS)
    return True
Ejemplo n.º 14
0
def server_build(server_name):
    """
    Builds an individual server based on the specification in the Datastore entity with name server_name.
    :param server_name: The Datastore entity name of the server to build
    :return: A boolean status on the success of the build
    """
    print(f'Building server {server_name}')
    server = ds_client.get(ds_client.key('cybergym-server', server_name))
    state_transition(entity=server, new_state=SERVER_STATES.BUILDING)

    # Commented because this is only for Fortinet right now.
    # if 'canIPForward' in server and server['config']['canIpForward']:
    #     image_config = {"name": server_name + "-disk", "sizeGb": 30,
    #                     "type": "projects/" + project + "/zones/" + zone + "/diskTypes/pd-ssd"}
    #     response = compute.disks().insert(project=project, zone=zone, body=image_config).execute()
    #     compute.zoneOperations().wait(project=project, zone=zone, operation=response["id"]).execute()

    # Begin the server build and keep trying for a bounded number of additional 30-second cycles
    i = 0
    build_success = False
    while not build_success and i < 5:
        workout_globals.refresh_api()
        try:
            response = compute.instances().insert(project=project, zone=zone, body=server['config']).execute()
            build_success = True
            print(f'Sent job to build {server_name}, and waiting for response')
        except BrokenPipeError:
            i += 1
    i = 0
    success = False
    while not success and i < 5:
        try:
            print(f"Begin waiting for build operation {response['id']}")
            compute.zoneOperations().wait(project=project, zone=zone, operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            print('Response timeout for build. Trying again')
            pass

    if success:
        print(f'Successfully built server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.RUNNING, existing_state=SERVER_STATES.BUILDING)
    else:
        print(f'Timeout in trying to build server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False

    # If this is a student entry server, register the DNS
    if 'student_entry' in server and server['student_entry']:
        print(f'Setting DNS record for {server_name}')
        ip_address = register_student_entry(server['workout'], server_name)
        server['external_ip'] = ip_address
        ds_client.put(server)
        server = ds_client.get(ds_client.key('cybergym-server', server_name))

    # Now stop the server before completing
    print(f'Stopping {server_name}')
    compute.instances().stop(project=project, zone=zone, instance=server_name).execute()
    state_transition(entity=server, new_state=SERVER_STATES.STOPPED)

    # If no other servers are building, then set the workout to the state of READY.
    build_id = server['workout']
    check_build_state_change(build_id=build_id, check_server_state=SERVER_STATES.STOPPED,
                             change_build_state=BUILD_STATES.READY)
Ejemplo n.º 15
0
def medic():
    """
    Reviews the state of all active workouts in the project and attempts to correct any which may have an invalid
    state. Invalid states often occur due to timeouts in processing the Google Cloud Functions.
    :returns: None
    """
    g_logger = log_client.logger('workout-actions')
    g_logger.log_text("MEDIC: Running Medic function")
    #
    # Fixing build timeout issues
    #
    # The add_filter does not have a != operator. This provides an equivalent results for active workouts.
    query_current_workouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_current_workouts.add_filter('active', '=', True).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                if 'state' in workout:
                    build_state = workout['state']
                    # if the workout state has not completed, then attempt to continue rebuilding the workout from where
                    # it left off.
                    if build_state in ordered_workout_build_states:
                        g_logger.log_text(
                            "MEDIC: Workout {} is in a build state of {}. Attempting to fix..."
                            .format(workout.key.name, build_state))
                        build_workout(workout_id=workout.key.name)
                elif type(workout) is datastore.entity.Entity:
                    # If there is no state, then this is not a valid workout, and we can delete the Datastore entity.
                    g_logger.log_text(
                        "Invalid workout specification in the datastore for workout ID: {}. Deleting the record."
                        .format(workout.key.name))
                    ds_client.delete(workout.key)
    #
    # Fixing workouts in state COMPLETED_FIREWALL. This may occur when the firewall gets built after the guacamole server
    #
    query_completed_firewalls = ds_client.query(kind='cybergym-workout')
    results = list(
        query_completed_firewalls.add_filter(
            "state", "=", BUILD_STATES.COMPLETED_FIREWALL).fetch())
    for workout in results:
        # Only transition the state if the last state change occurred over 5 minutes ago.
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 300):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in firewall completion. Changing state to READY"
                        .format(workout.key.name))
                    state_transition(workout, new_state=BUILD_STATES.RUNNING)
                    stop_workout(workout.key.name)
    #
    # Fixing workouts in state GUACAMOLE_SERVER_TIMEOUT. This may occur waiting for the guacamole server to come up
    #
    query_student_entry_timeouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_student_entry_timeouts.add_filter(
            "state", "=", BUILD_STATES.GUACAMOLE_SERVER_LOAD_TIMEOUT).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                # Change this to RUNNING unless the state change occurred over 15 minutes ago
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 900):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in guacamole timeout. Changing state to READY"
                        .format(workout.key.name))
                    state_transition(workout, new_state=BUILD_STATES.RUNNING)
                    stop_workout(workout.key.name)
                else:
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in guacamole timeout. Changing state to READY"
                        .format(workout.key.name))
                    print(
                        f"Workout {workout.key.name} stuck in guacamole timeout. Changing state to READY"
                    )
                    state_transition(workout, new_state=BUILD_STATES.READY)

    #
    # Fixing workouts in the state of STARTING. This may occur after a timeout in starting workouts.
    #
    query_start_timeouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_start_timeouts.add_filter("state", "=",
                                        BUILD_STATES.STARTING).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                # Only transition the state if the last state change occurred over 5 minutes ago.
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 300):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in a STARTING state. Stopping the workout."
                        .format(workout.key.name))
                    state_transition(workout, new_state=BUILD_STATES.RUNNING)
                    stop_workout(workout.key.name)

    #
    # Fixing workouts in the state of STOPPING. This may occur after a timeout in stopping workouts.
    #
    query_stop_timeouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_stop_timeouts.add_filter("state", "=",
                                       BUILD_STATES.STOPPING).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                # Only transition the state if the last state change occurred over 5 minutes ago.
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 300):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in a STARTING state. Stopping the workout."
                        .format(workout.key.name))
                    state_transition(workout, new_state=BUILD_STATES.RUNNING)
                    stop_workout(workout.key.name)

    #
    # Fixing workouts in the state of NUKING. This may occur after a timeout in deleting the workouts.
    #
    query_nuking_timeouts = ds_client.query(kind='cybergym-workout')
    results = list(
        query_nuking_timeouts.add_filter("state", "=",
                                         BUILD_STATES.NUKING).fetch())
    for workout in results:
        workout_project = workout.get('build_project_location', project)
        if workout_project == project:
            if get_workout_type(workout) == WORKOUT_TYPES.WORKOUT:
                # Only transition the state if the last state change occurred over 5 minutes ago.
                if workout['state-timestamp'] < str(
                        calendar.timegm(time.gmtime()) - 300):
                    g_logger.log_text(
                        "MEDIC: Workout {} stuck in a NUKING state. Attempting to nuke again."
                        .format(workout.key.name))
                    nuke_workout(workout.key.name)

    #
    #Fixing machines that did not get built
    #
    query_rebuild = ds_client.query(kind='cybergym-workout')
    query_rebuild.add_filter('state', '=', BUILD_STATES.READY)
    query_rebuild.add_filter('build_project_location', '=', project)
    running_machines = list(query_rebuild.fetch())
    current_machines = compute.instances().list(project=project,
                                                zone=zone).execute()

    list_current = []
    list_running = []
    list_missing = []

    current_machines_items = current_machines.get('items', None)
    while current_machines_items:
        for instance in current_machines_items:
            list_current.append(instance['name'])
        if 'nextPageToken' in current_machines:
            current_machines = compute.instances().list(
                project=project,
                zone=zone,
                pageToken=current_machines['nextPageToken']).execute()
            current_machines_items = current_machines.get('items', None)
        else:
            break
    for i in running_machines:
        unit = ds_client.get(ds_client.key('cybergym-unit', i['unit_id']))
        if unit['build_type'] == 'arena':
            for server in i['student_servers']:
                datastore_server_name = i.key.name + '-' + server['name']
                list_running.append(datastore_server_name)
                if datastore_server_name not in list_current:
                    list_missing.append(datastore_server_name)
        if unit['build_type'] == 'compute':
            for server in i['servers']:
                datastore_server_name = i.key.name + '-' + server['name']
                list_running.append(datastore_server_name)
                if datastore_server_name not in list_current:
                    list_missing.append(datastore_server_name)

    cloud_log('Medic', f'Missing servers{list_missing}', LOG_LEVELS.INFO)

    for server in list_missing:
        cloud_log('Medic', 'Rebuilding server{}'.format(server),
                  LOG_LEVELS.INFO)
        pubsub_topic = PUBSUB_TOPICS.MANAGE_SERVER
        publisher = pubsub_v1.PublisherClient()
        topic_path = publisher.topic_path(project, pubsub_topic)
        future = publisher.publish(topic_path,
                                   data=b'Server Build',
                                   server_name=server,
                                   action=SERVER_ACTIONS.BUILD)

    return
Ejemplo n.º 16
0
def server_build(server_name):
    """
    Builds an individual server based on the specification in the Datastore entity with name server_name.
    :param server_name: The Datastore entity name of the server to build
    :return: A boolean status on the success of the build
    """
    server = ds_client.get(ds_client.key('cybergym-server', server_name))
    build_id = server['workout']
    g_logger = log_client.logger(str(server_name))
    state_transition(entity=server, new_state=SERVER_STATES.BUILDING)
    config = server['config'].copy()
    """
    Currently, we need a workaround to insert the guacamole startup script because of a 1500 character limit on
    indexed fields. The exclude_from_index does not work on embedded datastore fields
    """
    if 'student_entry' in server and server['student_entry']:
        config['metadata'] = {
            'items': [{
                "key": "startup-script",
                "value": server['guacamole_startup_script']
            }]
        }

    # Begin the server build and keep trying for a bounded number of additional 30-second cycles
    i = 0
    build_success = False
    while not build_success and i < 5:
        workout_globals.refresh_api()
        try:
            if server['add_disk']:
                try:
                    image_config = {
                        "name":
                        server_name + "-disk",
                        "sizeGb":
                        server['add_disk'],
                        "type":
                        "projects/" + project + "/zones/" + zone +
                        "/diskTypes/pd-ssd"
                    }
                    response = compute.disks().insert(
                        project=project, zone=zone,
                        body=image_config).execute()
                    compute.zoneOperations().wait(
                        project=project, zone=zone,
                        operation=response["id"]).execute()
                except HttpError as err:
                    # If the disk already exists (i.e. a nuke), then ignore
                    if err.resp.status in [409]:
                        pass
            if server['build_type'] == BUILD_TYPES.MACHINE_IMAGE:
                source_machine_image = f"projects/{project}/global/machineImages/{server['machine_image']}"
                compute_beta = discovery.build('compute', 'beta')
                response = compute_beta.instances().insert(
                    project=project,
                    zone=zone,
                    body=config,
                    sourceMachineImage=source_machine_image).execute()
            else:
                if "delayed_start" in server and server["delayed_start"]:
                    time.sleep(30)
                response = compute.instances().insert(project=project,
                                                      zone=zone,
                                                      body=config).execute()
            build_success = True
            g_logger.log_text(
                f'Sent job to build {server_name}, and waiting for response')
        except BrokenPipeError:
            i += 1
        except HttpError as exception:
            cloud_log(
                build_id,
                f"Error when trying to build {server_name}: {exception.reason}",
                LOG_LEVELS.ERROR)
            return False
    i = 0
    success = False
    while not success and i < 5:
        try:
            g_logger.log_text(
                f"Begin waiting for build operation {response['id']}")
            compute.zoneOperations().wait(project=project,
                                          zone=zone,
                                          operation=response["id"]).execute()
            success = True
        except timeout:
            i += 1
            g_logger.log_text('Response timeout for build. Trying again')
            pass

    if success:
        g_logger.log_text(f'Successfully built server {server_name}')
        state_transition(entity=server,
                         new_state=SERVER_STATES.RUNNING,
                         existing_state=SERVER_STATES.BUILDING)
    else:
        g_logger.log_text(f'Timeout in trying to build server {server_name}')
        state_transition(entity=server, new_state=SERVER_STATES.BROKEN)
        return False

    # If this is a student entry server, register the DNS
    if 'student_entry' in server and server['student_entry']:
        g_logger.log_text(f'Setting DNS record for {server_name}')
        ip_address = register_student_entry(server['workout'], server_name)
        server['external_ip'] = ip_address
        ds_client.put(server)
        server = ds_client.get(ds_client.key('cybergym-server', server_name))

    # Now stop the server before completing
    g_logger.log_text(f'Stopping {server_name}')
    compute.instances().stop(project=project, zone=zone,
                             instance=server_name).execute()
    state_transition(entity=server, new_state=SERVER_STATES.STOPPED)

    # If no other servers are building, then set the workout to the state of READY.
    check_build_state_change(build_id=build_id,
                             check_server_state=SERVER_STATES.STOPPED,
                             change_build_state=BUILD_STATES.READY)
Ejemplo n.º 17
0
def build_workout(workout_id):
    """
    Builds a workout compute environment according to the specification referenced in the datastore with key workout_id
    :param workout_id: The workout_id key in the datastore holding the build specification
    :return: None
    """
    key = ds_client.key('cybergym-workout', workout_id)
    workout = ds_client.get(key)
    # This can sometimes happen when debugging a workout ID and the Datastore record no longer exists.
    if not workout:
        print('No workout for %s exists in the data store' % workout_id)
        return

    startup_scripts = None
    # Parse the assessment specification to obtain any startup scripts for the workout.
    if 'state' not in workout or not workout['state']:
        state_transition(entity=workout, new_state=BUILD_STATES.START)

    if workout['assessment']:
        startup_scripts = get_startup_scripts(workout_id=workout_id,
                                              assessment=workout['assessment'])
    # Create the networks and subnets
    if check_ordered_workout_state(workout, BUILD_STATES.BUILDING_NETWORKS):
        state_transition(entity=workout,
                         new_state=BUILD_STATES.BUILDING_NETWORKS)
        print('Creating networks')
        for network in workout['networks']:
            network_body = {
                "name": "%s-%s" % (workout_id, network['name']),
                "autoCreateSubnetworks": False,
                "region": region
            }
            response = compute.networks().insert(project=project,
                                                 body=network_body).execute()
            compute.globalOperations().wait(
                project=project, operation=response["id"]).execute()
            time.sleep(10)
            for subnet in network['subnets']:
                subnetwork_body = {
                    "name":
                    "%s-%s" % (network_body['name'], subnet['name']),
                    "network":
                    "projects/%s/global/networks/%s" %
                    (project, network_body['name']),
                    "ipCidrRange":
                    subnet['ip_subnet']
                }
                response = compute.subnetworks().insert(
                    project=project, region=region,
                    body=subnetwork_body).execute()
                compute.regionOperations().wait(
                    project=project, region=region,
                    operation=response["id"]).execute()
                state_transition(entity=workout,
                                 new_state=BUILD_STATES.COMPLETED_NETWORKS)

    # Now create the server configurations
    if check_ordered_workout_state(workout, BUILD_STATES.BUILDING_SERVERS):
        state_transition(entity=workout,
                         new_state=BUILD_STATES.BUILDING_SERVERS)
        print('Creating servers')
        for server in workout['servers']:
            server_name = "%s-%s" % (workout_id, server['name'])
            sshkey = server["sshkey"]
            tags = server['tags']
            machine_type = server["machine_type"]
            network_routing = server["network_routing"]
            min_cpu_platform = server[
                "minCpuPlatform"] if "minCpuPlatform" in server else None
            nics = []
            for n in server['nics']:
                nic = {
                    "network": f"{workout_id}-{n['network']}",
                    "internal_IP": n['internal_IP'],
                    "subnet": f"{workout_id}-{n['network']}-{n['subnet']}",
                    "external_NAT": n['external_NAT']
                }
                # Nested VMs are sometimes used for vulnerable servers. This adds those specified IP addresses as
                # aliases to the NIC
                if 'IP_aliases' in n and n['IP_aliases']:
                    alias_ip_ranges = []
                    for ipaddr in n['IP_aliases']:
                        alias_ip_ranges.append({"ipCidrRange": ipaddr})
                    nic['aliasIpRanges'] = alias_ip_ranges
                nics.append(nic)
            # Add the startup script for assessment as metadata if it exists
            meta_data = None
            if startup_scripts and server['name'] in startup_scripts:
                meta_data = startup_scripts[server['name']]

            create_instance_custom_image(compute=compute,
                                         workout=workout_id,
                                         name=server_name,
                                         custom_image=server['image'],
                                         machine_type=machine_type,
                                         networkRouting=network_routing,
                                         networks=nics,
                                         tags=tags,
                                         meta_data=meta_data,
                                         sshkey=sshkey,
                                         minCpuPlatform=min_cpu_platform)

        state_transition(entity=workout,
                         new_state=BUILD_STATES.COMPLETED_SERVERS)
    # Create the student entry guacamole server
    if check_ordered_workout_state(workout,
                                   BUILD_STATES.BUILDING_STUDENT_ENTRY):
        state_transition(entity=workout,
                         new_state=BUILD_STATES.BUILDING_STUDENT_ENTRY)
        if workout['student_entry']:
            network_name = f"{workout_id}-{workout['student_entry']['network']}"
            student_entry_username = workout['student_entry'][
                'username'] if 'username' in workout['student_entry'] else None
            security_mode = workout['student_entry'][
                'security-mode'] if 'security-mode' in workout[
                    'student_entry'] else 'nla'
            guac_connection = [{
                'workout_id': workout_id,
                'entry_type': workout['student_entry']['type'],
                'ip': workout['student_entry']['ip'],
                'username': student_entry_username,
                'password': workout['student_entry']['password'],
                'security-mode': security_mode
            }]
            build_guacamole_server(build=workout,
                                   network=network_name,
                                   guacamole_connections=guac_connection)
            # Get the workout key again or the state transition will overwrite it
            workout = ds_client.get(
                ds_client.key('cybergym-workout', workout_id))
        else:
            state_transition(entity=workout, new_state=BUILD_STATES.BROKEN)
            return
        state_transition(entity=workout,
                         new_state=BUILD_STATES.COMPLETED_STUDENT_ENTRY)
    # Create all of the network routes and firewall rules
    if check_ordered_workout_state(workout, BUILD_STATES.BUILDING_ROUTES):
        state_transition(entity=workout,
                         new_state=BUILD_STATES.BUILDING_ROUTES)
        print('Creating network routes and firewall rules')
        if 'routes' in workout and workout['routes']:
            for route in workout['routes']:
                response = compute.instances().get(
                    project=project,
                    zone=zone,
                    instance=f"{workout_id}-{route['next_hop_instance']}")
                r = {
                    "name":
                    "%s-%s" % (workout_id, route['name']),
                    "network":
                    "%s-%s" % (workout_id, route['network']),
                    "destRange":
                    route['dest_range'],
                    "nextHopInstance":
                    "%s-%s" % (workout_id, route['next_hop_instance'])
                }
                create_route(r)

    if check_ordered_workout_state(workout, BUILD_STATES.BUILDING_FIREWALL):
        state_transition(entity=workout,
                         new_state=BUILD_STATES.BUILDING_FIREWALL)
        firewall_rules = []
        for rule in workout['firewall_rules']:
            firewall_rules.append({
                "name":
                "%s-%s" % (workout_id, rule['name']),
                "network":
                "%s-%s" % (workout_id, rule['network']),
                "targetTags":
                rule['target_tags'],
                "protocol":
                rule['protocol'],
                "ports":
                rule['ports'],
                "sourceRanges":
                rule['source_ranges']
            })

        create_firewall_rules(firewall_rules)


# build_workout('isirdhzjqk')