def run(run_once=False): try: # Test reachability data = session.get(gns3_base_url + '/v2/version').json() print_notice( _("Connected to {hostname}:{port} (GNS v{version})").format( hostname=settings.GNS3['HOST'], port=settings.GNS3['PORT'], version=data['version'])) # Run while True: sync_projects_to_db() if run_once: return time.sleep(10) except RequestException as e: print(_("Cannot connect to GNS3 server: {}".format(e))) if not run_once: # Don't exit too quickly, otherwise uwsgi loops too fast time.sleep(60) except Exception as e: print(_('Unexpected error: {}').format(e))
def analyse_instancerunresult(pk): from measurements.models import InstanceRunResult from measurements.utils import compare_base64_images try: result = retry_get(InstanceRunResult.objects.select_for_update(), pk=pk) if result.analysed: return print_notice( _("Analysing InstanceRunResult {result.pk} ({result.instance_type}: {result.instancerun.url}) " "on {result.instancerun.trillian.name}").format(result=result)) baseline = result.instancerun.get_baseline() if not baseline: result.image_score = 0 result.resource_score = 0 result.overall_score = 0 result.analysed = timezone.now() result.save() return # If we have multiple possible combinations then test them all and choose the most positive one result.image_score, base = max([ (compare_base64_images(base.web_response['image'], result.web_response['image']), base) for base in baseline ]) # Analyse the resources base_stats = get_resource_stats(base.web_response['resources']) my_stats = get_resource_stats(result.web_response['resources']) result.resource_score = min( 1.0, my_stats['total']['ok'] / (base_stats['total']['ok'] or 1)) # Determine the overall score result.overall_score = result.image_score * result.resource_score result.analysed = timezone.now() result.save() except RetryTaskException: raise except InstanceRunResult.DoesNotExist: print_warning( _("InstanceRunResult {pk} does not exist anymore").format(pk=pk)) return except Exception as ex: print_error( _('{name} on line {line}: {msg}').format( name=type(ex).__name__, line=sys.exc_info()[-1].tb_lineno, msg=ex)) raise RetryTaskException
def analyse_instancerun(pk): from measurements.models import InstanceRun, InstanceRunResult try: children_finished = retry_all( InstanceRunResult.objects.filter(instancerun_id=pk).values_list( 'analysed', flat=True)) if not children_finished: return run = InstanceRun.objects.select_for_update().get(pk=pk) if run.analysed or not run.finished: return print_notice( _("Analysing InstanceRun {run.pk} ({run.url}) on {run.trillian.name}" ).format(run=run)) scores = InstanceRunResult.objects \ .filter(instancerun_id=pk) \ .values_list('image_score', 'resource_score', 'overall_score') run.image_score = mean([score[0] for score in scores]) run.resource_score = mean([score[1] for score in scores]) run.overall_score = mean([score[2] for score in scores]) run.analysed = timezone.now() run.save() except RetryTaskException: raise except InstanceRun.DoesNotExist: print_warning( _("InstanceRun {pk} does not exist anymore").format(pk=pk)) return except Exception as ex: print_error( _('{name} on line {line}: {msg}').format( name=type(ex).__name__, line=sys.exc_info()[-1].tb_lineno, msg=ex)) raise RetryTaskException
def execute_instancerun(pk): from measurements.models import InstanceRun, InstanceRunResult current_task = get_current_task() try: # Make sure we need to start and we don't start twice with transaction.atomic(): run = retry_get(InstanceRun.objects.select_for_update(), pk=pk) if run.started: print_notice( _('InstanceRun {pk} has already started, skipping').format( pk=pk)) return now = timezone.now() if run.requested > now: print_notice( _('InstanceRun {pk} is requested to start in the future, skipping' ).format(pk=pk)) return # We are starting! run.started = now run.save() # Log which instancerun we're working on print_message( _("Start working on InstanceRun {run.pk} ({run.url})").format( run=run)) # Do a simple DNS lookup addresses = set() for info in socket.getaddrinfo(urlparse(run.url).hostname, port=80, proto=socket.IPPROTO_TCP): family, socktype, proto, canonname, sockaddr = info addresses.add(ipaddress.ip_address(sockaddr[0])) run.dns_results = list([str(address) for address in addresses]) # First determine a baseline marvin = get_marvins(['dual-stack'], current_task)['dual-stack'] with marvin: response = requests.request(method='POST', url='http://{}:3001/browse'.format( marvin.name), json={ 'url': run.url, }, timeout=(5, 65)) if response.status_code != 200: timeout = randrange(5, 120) print_error( _("Baseline test failed, retrying in {timeout} seconds"). format(timeout=timeout)) raise RetryTaskException(timeout=timeout) baseline = response.json() # Determine which protocols to check site_v4_addresses = [ address for address in addresses if address.version == 4 ] site_v6_addresses = [ address for address in addresses if address.version == 6 ] instance_types = {'nat64', 'dual-stack'} if site_v4_addresses: instance_types.add('v4only') else: InstanceRunMessage.objects.create( instancerun=run, severity=logging.WARNING, message=gettext_noop( 'This website has no IPv4 addresses so the IPv4-only test is skipped' ), ) if site_v6_addresses: instance_types.add('v6only') else: InstanceRunMessage.objects.create( instancerun=run, severity=logging.WARNING, message=gettext_noop( 'This website has no IPv6 addresses so the IPv6-only test is skipped' ), ) marvins = get_marvins(instance_types, current_task) with FuturesSession(executor=ThreadPoolExecutor( max_workers=2 * len(marvins))) as session: with ExitStack() as stack: # Signal usage of Marvins for marvin in marvins.values(): stack.enter_context(marvin) # Start requests browse_requests = {} for instance_type, marvin in marvins.items(): browse_requests[instance_type] = session.request( method='POST', url='http://{}:3001/browse'.format(marvin.name), json={ 'url': run.url, 'timeout': 30, }, timeout=(5, 65)) ping_requests = {} for instance_type, marvin in marvins.items(): marvin_has_v4 = instance_type in ('v4only', 'dual-stack') marvin_has_nat64 = instance_type in ('nat64', ) marvin_has_v6 = instance_type in ('v6only', 'dual-stack', 'nat64') if marvin_has_v4: for address in site_v4_addresses: address_str = str(address) ping_requests.setdefault( instance_type, {})[address_str] = session.request( method='POST', url='http://{}:3001/ping4'.format( marvin.name), json={'target': address_str}, timeout=(5, 65)) if marvin_has_nat64: for address in site_v4_addresses: address_str = str( IPv6Address('64:ff9b::') + int(address)) ping_requests.setdefault( instance_type, {})[address_str] = session.request( method='POST', url='http://{}:3001/ping6'.format( marvin.name), json={'target': address_str}, timeout=(5, 65)) if marvin_has_v6: for address in site_v6_addresses: address_str = str(address) ping_requests.setdefault( instance_type, {})[address_str] = session.request( method='POST', url='http://{}:3001/ping6'.format( marvin.name), json={'target': address_str}, timeout=(5, 65)) # Wait for all the responses to come back in browse_responses = {} for instance_type, request in browse_requests.items(): browse_responses[instance_type] = request.result() ping_responses = {} for instance_type, address_requests in ping_requests.items(): for address, request in address_requests.items(): ping_responses[(instance_type, address)] = request.result() for req, response in list(browse_responses.items()) + list( ping_responses.items()): if response.status_code >= 300: print_error("{req} {url} ({code}): {json}".format( code=response.status_code, req=req, url=response.url, json=response.json())) # Check if all tests succeeded if not all([ response.status_code == 200 for response in list(browse_responses.values()) + list(ping_responses.values()) ]): timeout = randrange(5, 120) print_error( _("Not all tests completed successfully, retrying in {timeout} seconds" ).format(timeout=timeout)) raise RetryTaskException(timeout=timeout) # Parse all JSON ping_responses_json = {} for (instance_type, address), response in ping_responses.items(): ping_responses_json.setdefault(instance_type, {}) ping_responses_json[instance_type][address] = response.json( object_pairs_hook=OrderedDict) browse_responses_json = { instance_type: response.json(object_pairs_hook=OrderedDict) for instance_type, response in browse_responses.items() } # Compare dual-stack to the baseline if len(baseline['resources']) != len(browse_responses_json['dual-stack']['resources']) or \ compare_base64_images(baseline['image'], browse_responses_json['dual-stack']['image']) < 0.98: InstanceRunMessage.objects.create( instancerun=run, severity=logging.WARNING, message=gettext_noop( 'Two identical requests returned different results. ' 'Results are going to be unpredictable.'), ) for instance_type in instance_types: InstanceRunResult.objects.update_or_create( defaults={ 'ping_response': ping_responses_json[instance_type], 'web_response': browse_responses_json[instance_type], }, instancerun=run, marvin=marvins[instance_type], ) # We are starting! run.finished = timezone.now() run.save() print_message( _("Work on InstanceRun {run.pk} ({run.url}) completed").format( run=run)) except RetryTaskException: # Clear the started timestamp and messages so it can be retried, and trigger retry InstanceRun.objects.filter(pk=pk).update(started=None, finished=None) InstanceRunMessage.objects.filter(instancerun_id=pk).delete() raise except InstanceRun.DoesNotExist: print_warning( _("InstanceRun {pk} does not exist anymore").format(pk=pk)) return except Exception as ex: print_error( _('{name} on line {line}: {msg}').format( name=type(ex).__name__, line=sys.exc_info()[-1].tb_lineno, msg=ex)) print_error(format_exc()) # Clear the started timestamp and messages so it can be retried, and trigger retry InstanceRun.objects.filter(pk=pk).update(started=None, finished=None) InstanceRunMessage.objects.filter(instancerun_id=pk).delete() raise RetryTaskException
def run(): print_notice("Starting telnet relay") try: redis = StrictRedis(**private_settings.WS4REDIS_CONNECTION) subscriber = redis.pubsub() subscriber.psubscribe('server:*/events') # noinspection PyProtectedMember redis_fd = subscriber.connection._sock.fileno() sockets = [redis_fd] sessions = {} while sockets: readable, writable, exceptional = select.select(sockets, [], []) for s in readable: if s is redis_fd: # Data from websocket to telnet message = subscriber.parse_response() msg_type = message.pop(0) if msg_type != b'pmessage': continue pattern, channel, message = message match = channel_pattern.match(channel) if not match: print_warning( _("Malformed channel name: {}").format(channel)) continue exercise_id = int(match.group(1)) data = json.loads(message) if 'type' not in data or 'node_id' not in data: print_warning( _("Malformed terminal input: {}").format(message)) continue if data['type'] != 'terminal-input': # Not for us continue node_id = int(data['node_id']) key = (exercise_id, node_id) if key in sessions: session = sessions[key] else: nodes = list( ExerciseNode.objects.filter( project_id=exercise_id, id=node_id).select_related('project')) if not nodes or not isinstance(nodes[0].template_node, (WorkNode, IRRNode)): print_warning( _("Invalid node-id {node_id} provided for exercise {exercise_id}" ).format(node_id=node_id, exercise_id=exercise_id)) node = nodes[0] gns3_node = get_gns3_node(node.project.gns3_id, node.gns3_id) if gns3_node['console_type'] != 'telnet': # We can only handle telnet consoles, put on the ignore list sessions[key] = None continue if gns3_node['console_host'] == '::': host = '::1' elif gns3_node['console_host'] == '0.0.0.0': host = '127.0.0.1' else: host = gns3_node['console_host'] port = gns3_node['console'] try: session = LabTelnet(key, host, port) print_notice( _("Telnet connection to {} {} established"). format(host, port)) except ConnectionRefusedError: print_warning( _("Connection refused by {} {}").format( host, port)) continue sockets.append(session) sessions[key] = session # If there is no session then ignore the data if not session: continue # Write the data to the session try: session.write(data['data'].encode()) except EOFError: # Connection closed, clean up print_warning( _("Telnet connection to {} {} closed").format( session.host, session.port)) sockets.remove(s) del sessions[s.key] except (OSError, IOError) as e: print_exc() print_error(e) else: # Data from telnet to websocket exercise_id, node_id = s.key try: data = s.read_eager() redis_publisher = LabPublisher( facility='{}/events'.format(exercise_id), broadcast=True) redis_publisher.publish_message( RedisMessage( json.dumps({ 'type': 'terminal-output', 'node_id': node_id, 'data': b64encode(data).decode('ascii'), }))) except EOFError: # Connection closed, clean up print_warning( _("Telnet connection to {} {} closed").format( s.host, s.port)) sockets.remove(s) del sessions[s.key] except (OSError, IOError) as e: print_exc() print_error(e) except Exception as e: print_exc() print_error(e)
def run(): try: print_notice( _("Listening for state updates on {addr}:{port}").format( addr=settings.STATE_COLLECTOR['ADDRESS'], port=settings.STATE_COLLECTOR['PORT'], )) listen_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM, socket.IPPROTO_TCP) listen_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) listen_sock.setblocking(False) listen_sock.bind((settings.STATE_COLLECTOR['ADDRESS'], settings.STATE_COLLECTOR['PORT'])) listen_sock.listen(128) redis = StrictRedis(**private_settings.WS4REDIS_CONNECTION) subscriber = redis.pubsub() subscriber.psubscribe('server:*/events') # noinspection PyProtectedMember redis_fd = subscriber.connection._sock.fileno() sockets = [listen_sock, redis_fd] while sockets: readable, writable, exceptional = select.select(sockets, [], []) for s in readable: if s is listen_sock: connection, client_address = s.accept() print_message( "Incoming state connection from {addr}".format( addr=client_address)) connection.setblocking(False) sockets.append(StateConnection(connection)) # Ask for ID connection.send(b"*****[ ID ]*****\n") connection.send(b"*****[ END ]*****\n") continue if s is redis_fd: # Data from websocket to telnet message = subscriber.parse_response() msg_type = message.pop(0) if msg_type != b'pmessage': continue pattern, channel, message = message match = channel_pattern.match(channel) if not match: print_warning( _("Malformed channel name: {}").format(channel)) continue exercise_id = int(match.group(1)) data = json.loads(message) if 'type' not in data or 'node_id' not in data: print_warning( _("Malformed terminal input: {}").format(message)) continue if data['type'] not in ['irr-query', 'irr-update']: # Not for us continue node_id = int(data['node_id']) # Find the connection belonging to this exercise node for sc in sockets: if isinstance(sc, StateConnection) and sc.node and \ sc.node.project_id == exercise_id and sc.node.id == node_id: break else: print_warning( _("No existing connection found for exercise {exercise} node {node}" ).format(exercise=exercise_id, node=node_id)) redis_publisher = LabPublisher( facility='{}/events'.format(exercise_id), broadcast=True) redis_publisher.publish_message( RedisMessage( json.dumps( { 'type': data['type'] + '-response', 'node': node_id, 'response': 'Server is not yet available', }, cls=DjangoJSONEncoder))) continue sc.send_message(data) continue result = s.collect_data() if not result: # End of connection print_message("Lost state connection from {addr}".format( addr=s.connection.getpeername())) sockets.remove(s) s.close() except Exception as e: print_error(e)
def analyse_testrun(pk): from measurements.models import (TestRun, InstanceRun, InstanceRunResult, TestRunAverage) try: children_finished = retry_all( InstanceRun.objects.filter(testrun_id=pk).values_list('analysed', flat=True)) if not children_finished: return run = TestRun.objects.select_for_update().get(pk=pk) if run.analysed: return print_notice( _("Analysing TestRun {run.pk} ({run.url})").format(run=run)) scores = InstanceRun.objects \ .filter(testrun_id=pk) \ .values_list('image_score', 'resource_score', 'overall_score') run.image_score = mean([score[0] for score in scores]) run.resource_score = mean([score[1] for score in scores]) run.overall_score = mean([score[2] for score in scores]) averages = InstanceRunResult.objects \ .filter(instancerun__testrun_id=pk) \ .values('marvin__instance_type') \ .annotate(image_score=Avg('image_score'), resource_score=Avg('resource_score'), overall_score=Avg('overall_score')) for average in averages: TestRunAverage.objects.update_or_create( defaults={ 'image_score': average['image_score'], 'resource_score': average['resource_score'], 'overall_score': average['overall_score'], }, testrun_id=pk, instance_type=average['marvin__instance_type']) run.analysed = timezone.now() run.save() except RetryTaskException: raise except TestRun.DoesNotExist: print_warning(_("TestRun {pk} does not exist anymore").format(pk=pk)) return except Exception as ex: print_error( _('{name} on line {line}: {msg}').format( name=type(ex).__name__, line=sys.exc_info()[-1].tb_lineno, msg=ex)) raise RetryTaskException
def sync_projects_to_db(): print_debug("Synchronising projects") # Detect projects on the server server_projects = get_gns3_projects(session=session) # Sync projects projects = Project.objects.select_subclasses() for project in projects: try: for server_project in server_projects: if server_project['project_id'].lower() == str( project.gns3_id).lower(): break else: # Where did that one go?!? print_warning( "- " + _("Project {project.name} disappeared from GNS3 server" ).format(project=project)) # project.delete() continue # Open the project so its data becomes available in the API session.post(gns3_base_url + '/v2/projects/' + server_project['project_id'] + '/open') if server_project['name'] != project.name: print_message("- " + _("Project {old_name} renamed to {new_name}" ).format(old_name=project.name, new_name=server_project['name'])) project.name = server_project['name'] project.save() # Sync nodes server_nodes = get_gns3_nodes(server_project['project_id'], session=session) nodes = project.node_set.select_subclasses() for node in nodes: for server_node in server_nodes: if server_node['node_id'].lower() == str( node.gns3_id).lower(): break else: # Where did that one go?!? print_warning("- " + _( "Node {node.name} of project {project.name} disappeared from GNS3 server" ).format(node=node, project=project)) # node.delete() continue if server_node['name'] != node.name: print_message( "- " + _("Project {project.name} node {old} renamed to {new}" ).format(project=project, old=node.name, new=server_node['name'])) node.name = server_node['name'] node.save() if server_node['properties']['mac_address'] != node.mac_address: print_message( "- " + _("Project {project.name} node {node.name} " "change MAC fix_address from {old} to {new}").format( project=project, node=node, old=node.mac_address, new=server_node['properties']['mac_address'])) node.mac_address = server_node['properties']['mac_address'] node.save() if isinstance(node, ExerciseNode): node.gns3_update_monitor_option(session=session) if isinstance(project, Exercise): running = len([ node for node in server_nodes if node['status'] == 'started' ]) if running and project.deadline and project.deadline < timezone.now( ): print_notice( _("Stopping exercise {}").format(project.name)) project.gns3_stop(session=session) if project.deadline and project.deadline < timezone.now( ) - timedelta(weeks=1): print_notice( _("Deleting exercise {}").format(project.name)) project.delete() continue except IntegrityError: print_error(" - " + _("Template is still referenced, leaving it for now"))