Example #1
0
    def stream_artifact_in_chunks(self, job_id, kind, name, path, binary=False):
        """Stream job artifact to server in chunks"""

        file_size = os.path.getsize(path)
        object_id = generate_object_id(job_id, kind, name)
        object_sha = sha256_of_file(path)
        num_chunks = len(list(self._get_chunks(file_size)))

        query = Command.new(self.__ws_client.socket(), action='chunk-stream-query', object_id=object_id)
        query_result = self.__ws_client.send(query, assertions={'message': 'ok'})
        existing_chunk_shas = {}
        if 'stored_chunk_shas' in query_result and query_result['stored_chunk_shas'] != '':
            existing_chunk_shas = dict(item.split(":") for item in query_result['stored_chunk_shas'].split(","))
            log.debug("found existing stored chunks: {}".format(existing_chunk_shas))

        matching_uploaded_chunks = 0
        try:
            for chunk_start, chunk_size, chunk_id in self._get_chunks(file_size):
                # skip if server already has chunk stored
                if str(chunk_id) in existing_chunk_shas:
                    log.info("chunk {} already exists on server skipping upload".format(chunk_id))
                    continue

                log.info("sending artifact[{}][{}] chunk: {}".format(name, object_id, chunk_id))

                command = Command.new(self.__ws_client.socket(), action='chunk-stream', test_id=job_id, file_size=file_size,
                                      num_of_chunks=num_chunks, chunk_id=chunk_id, object_id=object_id,
                                      object_sha=object_sha, chunk_size=chunk_size,
                                      kind=kind, name=name, eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER, binary=binary)
                response = self.__ws_client.send(command, assertions={'message': 'ready'})

                chunk_sha = hashlib.sha256()
                with open(path) as fh:
                    fh.seek(chunk_start, os.SEEK_SET)
                    while fh.tell() < (chunk_start + chunk_size):
                        byte_size = 512 if fh.tell() + 512 < (chunk_start + chunk_size) else (chunk_start + chunk_size) - fh.tell()
                        data = fh.read(byte_size)
                        chunk_sha.update(data)
                        data = base64.b64encode(data)
                        self.__ws_client.send(data)
                    self.__ws_client.send(base64.b64encode(EOF_MARKER))
                    response = self.__ws_client.receive(response, assertions={'message': 'chunk_received', 'done': True})

                    if 'chunk_sha' in response and chunk_sha.hexdigest() == response['chunk_sha']:
                        matching_uploaded_chunks += 1
                    else:
                        log.error('chunk upload failed: response[{}], objectid: [{}], chunkid: [{}], totalchunks: [{}], name: [{}]'
                                  .format(response, object_id, chunk_id, num_chunks, name))
        finally:
            uploaded_successfully = matching_uploaded_chunks == num_chunks
            log.info("UPLOADED SUCCESS: {}".format(uploaded_successfully))
            self.__ws_client.send(Command.new(self.__ws_client.socket(), action='chunk-stream-complete', successful=uploaded_successfully,
                                  test_id=job_id, object_id=object_id, kind=kind, name=name),
                      assertions={'message': 'ok'})
Example #2
0
 def __authenticate(self, command):
     """Sign the token the server asked us to sign.
     Send it back.
     Give the server a token of our own to sign.
     Verify it."""
     assert command.get('action') == 'authenticate'
     data = {'signature' :self.__client_key.sign_message(command['token']),
             'cluster': self.__cluster_name}
     response = command.respond(**data)
     if response.get('authenticated') != True:
         raise UnauthenticatedError("Our peer could not validate our signed auth token")
     # cool, the server authenticated us, now we need to
     # authenticate the server:
     token = random_token()
     cmd = Command.new(self.ws, action='authenticate', token=token)
     response = cmd.send()
     signature = response['signature']
     # Verify the signature, raises BadSignatureError if it fails:
     try:
         self.__server_key.verify_message(token, signature)
     except:
         response.respond(message='Bad Signature of token for authentication', done=True)
         log.error('server provided bad signature for auth token')
         raise
     response.respond(authenticated=True, done=True)
Example #3
0
    def authenticate():
        token_to_sign = random_token()
        cmd = Command.new(ws, action='authenticate', token=token_to_sign)
        response = cmd.send()
        context['cluster'] = cluster = response['cluster']
        client_pubkey = db.get_pub_key(cluster)
        client_apikey = APIKey(client_pubkey['pubkey'])

        # Verify the client correctly signed the token:
        try:
            client_apikey.verify_message(token_to_sign,
                                         response.get('signature'))
        except:
            response.respond(
                message='Bad Signature of token for authentication', done=True)
            log.error('client provided bad signature for auth token')
            raise

        response.respond(authenticated=True, done=True)

        # Client will ask us to authenticate too:
        command = receive_data(ws)
        assert command.get('action') == 'authenticate'
        data = {'signature': context['apikey'].sign_message(command['token'])}
        response = command.respond(**data)
        if response.get('authenticated') != True:
            raise UnauthenticatedError(
                "Our peer could not validate our signed auth token")
Example #4
0
 def __authenticate(self, command):
     """Sign the token the server asked us to sign.
     Send it back.
     Give the server a token of our own to sign.
     Verify it."""
     assert command.get('action') == 'authenticate'
     data = {'signature': self.__client_key.sign_message(command['token']),
             'cluster':   self.__cluster_name}
     response = command.respond(**data)
     if not response.get('authenticated'):
         raise UnauthenticatedError("Our peer could not validate our signed auth token")
     # cool, the server authenticated us, now we need to
     # authenticate the server:
     token = random_token()
     cmd = Command.new(self.socket(), action='authenticate', token=token)
     response = cmd.send()
     signature = response['signature']
     # Verify the signature, raises BadSignatureError if it fails:
     try:
         self.__server_key.verify_message(token, signature)
     except:
         response.respond(message='Bad Signature of token for authentication', done=True)
         log.error('server provided bad signature for auth token')
         raise
     response.respond(authenticated=True, done=True)
Example #5
0
    def authenticate():
        token_to_sign = random_token()
        cmd = Command.new(ws, action='authenticate', token=token_to_sign)
        response = cmd.send()
        context['cluster'] = cluster = response['cluster']
        client_pubkey = db.get_pub_key(cluster)
        client_apikey = APIKey(client_pubkey['pubkey'])
        
        # Verify the client correctly signed the token:
        try:
            client_apikey.verify_message(token_to_sign, response.get('signature'))
        except:
            response.respond(message='Bad Signature of token for authentication', done=True)
            log.error('client provided bad signature for auth token')
            raise

        response.respond(authenticated=True, done=True)

        # Client will ask us to authenticate too:
        command = receive_data(ws)
        assert command.get('action') == 'authenticate'
        data = {'signature' :context['apikey'].sign_message(command['token'])}
        response = command.respond(**data)
        if response.get('authenticated') != True:
            raise UnauthenticatedError("Our peer could not validate our signed auth token")
Example #6
0
    def stream_artifact(self, job_id, kind, name, path, binary=False):
        """Stream job artifact to server"""
        # Inform the server we will be streaming an artifact:
        command = Command.new(self.__ws_client.socket(),
                              action='stream',
                              test_id=job_id,
                              kind=kind,
                              name=name,
                              eof=EOF_MARKER,
                              keepalive=KEEPALIVE_MARKER,
                              binary=binary)
        response = self.__ws_client.send(command,
                                         assertions={'message': 'ready'})

        fsize = format_bytesize(os.stat(path).st_size)
        with open(path) as f:
            log.info('Streaming {name} - {path} ({fsize})'.format(name=name,
                                                                  path=path,
                                                                  fsize=fsize))
            while True:
                data = f.read(512)
                if data == '':
                    break
                data = base64.b64encode(data)
                self.__ws_client.send(data)
        self.__ws_client.send(base64.b64encode(EOF_MARKER))
        self.__ws_client.receive(response,
                                 assertions={
                                     'message': 'stream_received',
                                     'done': True
                                 })
Example #7
0
 def __job_done(self, job_id, status='completed', message=None, stacktrace=None):
     """Tell the server we're done with a job, and give it the test artifacts"""
     ##{type:'command', command_id:'llll', action:'test_done', test_id:'xxxxxxx'}
     command = Command.new(self.ws, action="test_done", test_id=job_id, status=status)
     if message is not None:
         command['message'] = message
     if stacktrace is not None:
         command['stacktrace'] = stacktrace
     log.debug("Sending job completion message for {test_id} ...".format(test_id=job_id))
     response = command.send()
     ##{type:'response', command_id:'llll', test_id:'xxxxxx', message='test_update', done:true}
     assert response['test_id'] == job_id
     assert response['message'] == 'test_update'
     assert response['done'] == True
     log.debug("Server confirms job {test_id} is complete.".format(test_id=job_id))
Example #8
0
 def __job_done(self, job_id, status='completed', message=None, stacktrace=None):
     """Tell the server we're done with a job, and give it the test artifacts"""
     ##{type:'command', command_id:'llll', action:'test_done', test_id:'xxxxxxx'}
     command = Command.new(self.__ws_client.socket(), action="test_done", test_id=job_id, status=status)
     if message is not None:
         command['message'] = message
     if stacktrace is not None:
         command['stacktrace'] = stacktrace
     log.debug("Sending job completion message for {test_id} ...".format(test_id=job_id))
     response = command.send()
     ##{type:'response', command_id:'llll', test_id:'xxxxxx', message='test_update', done:true}
     assert response['test_id'] == job_id
     assert response['message'] == 'test_update'
     assert response['done'] == True
     log.debug("Server confirms job {test_id} is complete.".format(test_id=job_id))
Example #9
0
    def stream_artifact(self, job_id, kind, name, path, binary=False):
        """Stream job artifact to server"""
        # Inform the server we will be streaming an artifact:
        command = Command.new(self.__ws_client.socket(), action='stream', test_id=job_id,
                              kind=kind, name=name, eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER, binary=binary)
        response = self.__ws_client.send(command, assertions={'message': 'ready'})

        fsize = format_bytesize(os.stat(path).st_size)
        with open(path) as f:
            log.info('Streaming {name} - {path} ({fsize})'.format(name=name, path=path, fsize=fsize))
            while True:
                data = f.read(512)
                if data == '':
                    break
                data = base64.b64encode(data)
                self.__ws_client.send(data)
        self.__ws_client.send(base64.b64encode(EOF_MARKER))
        self.__ws_client.receive(response, assertions={'message': 'stream_received', 'done': True})
Example #10
0
 def __get_work(self):
     """Ask the server for work"""
     command = Command.new(self.ws, action='get_work')
     response = command.send()
     while True:
         # We either got a job, or we received a wait request:
         if response.get('action') == 'wait':
             response = response.receive()
             continue
         elif response.has_key('test'):
             break
         else:
             raise AssertionError(
                 'Response was neither a wait action, nor contained '
                 'any test for us to run: {response}'.format(response=response))
     job = response['test']
     test_id = job['test_id']
     response = response.respond(test_id=test_id, status='prepared')
     assert response['status'] == 'in_progress'
     return job
Example #11
0
 def __get_work(self):
     """Ask the server for work"""
     command = Command.new(self.__ws_client.socket(), action='get_work')
     response = command.send()
     while True:
         # We either got a job, or we received a wait request:
         if response.get('action') == 'wait':
             response = response.receive()
             continue
         elif response.has_key('test'):
             break
         else:
             raise AssertionError(
                 'Response was neither a wait action, nor contained '
                 'any test for us to run: {response}'.format(response=response))
     job = response['test']
     test_id = job['test_id']
     response = response.respond(test_id=test_id, status='prepared')
     assert response['status'] == 'in_progress'
     return job
Example #12
0
 def __good_bye(self):
     """Tell the server we're disconnecting"""
     command = Command.new(self.socket(), action="good_bye")
     log.debug("Sending goodbye message to server..")
     command.send(await_response=False)
Example #13
0
    def perform_job(self, job):
        """Perform a job the server gave us, stream output and artifacts to the given websocket."""
        job = copy.deepcopy(job['test_definition'])
        # Cleanup the job structure according to what stress_compare needs:
        for operation in job['operations']:
            operation['type'] = operation['operation']
            del operation['operation']

        job_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf','jobs',job['test_id'])
        mkpath(job_dir)
        stats_path = os.path.join(job_dir,'stats.{test_id}.json'.format(test_id=job['test_id']))
        summary_path = os.path.join(job_dir,'stats_summary.{test_id}.json'.format(test_id=job['test_id']))
        stress_log_path = os.path.join(job_dir,'stress_compare.{test_id}.log'.format(test_id=job['test_id']))

        stress_json = json.dumps(dict(revisions=job['revisions'],
                                      operations=job['operations'],
                                      title=job['title'],
                                      leave_data=job.get('leave_data', False),
                                      log=stats_path))

        # Create a temporary location to store the stress_compare json file:
        stress_json_path = os.path.join(job_dir, 'test.{test_id}.json'.format(test_id=job['test_id']))
        with open(stress_json_path, 'w') as f:
            f.write(stress_json)

        # Inform the server we will be streaming the console output to them:
        command = Command.new(self.__ws_client.socket(), action='stream', test_id=job['test_id'],
                              kind='console', name="stress_compare.{test_id}.log".format(test_id=job['test_id']),
                              eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER)
        response = self.__ws_client.send(command, assertions={'message':'ready'})

        # Start a status checking thread.
        # If a user cancel's the job after it's marked in_progress, we
        # need to periodically check for that state change and kill
        # our test:
        cancel_checker = JobCancellationTracker(urlparse.urlparse(self.ws_endpoint).netloc, job['test_id'])
        cancel_checker.start()

        # stats file observer
        # looks for changes to update server with status progress message
        observer = Observer()
        observer.schedule(UpdateServerProgressMessageHandler(job, urlparse.urlparse(self.ws_endpoint).netloc),
                          os.path.join(os.path.expanduser("~"), '.cstar_perf', 'jobs'),
                          recursive=True)
        observer.start()

        # Run stress_compare in a separate process, collecting the
        # output as an artifact:
        try:
            # Run stress_compare with pexpect. subprocess.Popen didn't
            # work due to some kind of tty issue when invoking
            # nodetool.
            stress_proc = pexpect.spawn('cstar_perf_stress {stress_json_path}'.format(stress_json_path=stress_json_path), timeout=None)
            with open(stress_log_path, 'w') as stress_log:
                while True:
                    try:
                        with timeout(25):
                            line = stress_proc.readline()
                            if line == '':
                                break
                            stress_log.write(line)
                            sys.stdout.write(line)
                            self.__ws_client.send(base64.b64encode(line))
                    except TimeoutError:
                        self.__ws_client.send(base64.b64encode(KEEPALIVE_MARKER))
        finally:
            cancel_checker.stop()
            observer.stop()
            self.__ws_client.send(base64.b64encode(EOF_MARKER))

        response = self.__ws_client.receive(response, assertions={'message': 'stream_received', 'done': True})

        # Find the log tarball for each revision by introspecting the stats json:
        system_logs = []
        flamegraph_logs = []
        yourkit_logs = []
        log_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf', 'logs')
        flamegraph_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf', 'flamegraph')
        yourkit_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf', 'yourkit')
        #Create a stats summary file without voluminous interval data
        if os.path.isfile(stats_path):
            with open(stats_path) as stats:
                stats = json.loads(stats.read())
                for rev in stats['revisions']:
                    system_logs.append(os.path.join(log_dir, "{name}.tar.gz".format(name=rev['last_log'])))
                    fg_path = os.path.join(flamegraph_dir, "{name}.tar.gz".format(name=rev['last_log']))
                    yourkit_path = os.path.join(yourkit_dir, "{name}.tar.gz".format(name=rev['last_log']))
                    if os.path.exists(fg_path):
                        flamegraph_logs.append(fg_path)
                    if os.path.exists(yourkit_path):
                        yourkit_logs.append(yourkit_path)
                with open(summary_path, 'w') as summary:
                    hadStats = False
                    for op in stats['stats']:
                        if op['type'] == 'stress':
                            try:
                                del op['intervals']
                                hadStats = True
                            except KeyError:
                                pass
                        try:
                            del op['output']
                        except KeyError:
                            pass
                    if hadStats:
                        json.dump(obj=stats, fp=summary, sort_keys=True, indent=4, separators=(',', ': '))

        # Make a new tarball containing all the revision logs:
        tmptardir = tempfile.mkdtemp()
        try:
            job_log_dir = os.path.join(tmptardir, 'cassandra_logs.{test_id}'.format(test_id=job['test_id']))
            os.mkdir(job_log_dir)
            for x, syslog in enumerate(system_logs, 1):
                with tarfile.open(syslog) as tar:
                    tar.extractall(job_log_dir)
                    os.rename(os.path.join(job_log_dir, tar.getnames()[0]), os.path.join(job_log_dir, 'revision_{x:02d}'.format(x=x)))
            system_logs_path = os.path.join(job_dir, 'cassandra_logs.{test_id}.tar.gz'.format(test_id=job['test_id']))
            with tarfile.open(system_logs_path, 'w:gz') as tar:
                with cd(tmptardir):
                    tar.add('cassandra_logs.{test_id}'.format(test_id=job['test_id']))
            assert os.path.exists(system_logs_path)
        finally:
            shutil.rmtree(tmptardir)

        # Make a new tarball containing all the flamegraph and data
        if flamegraph_logs:
            tmptardir = tempfile.mkdtemp()
            try:
                flamegraph_tmp_dir = os.path.join(tmptardir, 'flamegraph_logs.{test_id}'.format(test_id=job['test_id']))
                os.mkdir(flamegraph_tmp_dir)
                for x, flamegraph in enumerate(flamegraph_logs, 1):
                    with tarfile.open(flamegraph) as tar:
                        tar.extractall(flamegraph_tmp_dir)
                        tmp_dir = os.path.join(flamegraph_tmp_dir, tar.getnames()[0])

                        # Copy all flamegraph as artifacts
                        for node_dir in os.listdir(tmp_dir):
                            glob_match = os.path.join(os.path.join(tmp_dir, node_dir), '*.svg')
                            graphs = glob.glob(glob_match)
                            for graph in graphs:
                                graph_name = os.path.basename(graph).replace(
                                    'flamegraph_', 'flamegraph_{}_{}_'.format(job['test_id'], node_dir))
                                graph_dst_filename = os.path.join(job_dir, graph_name)
                                shutil.copyfile(graph, graph_dst_filename)

                        os.rename(tmp_dir, os.path.join(flamegraph_tmp_dir, 'revision_{x:02d}'.format(x=x)))

                flamegraph_job_path = os.path.join(job_dir, 'flamegraph_logs.{test_id}.tar.gz'.format(test_id=job['test_id']))
                with tarfile.open(flamegraph_job_path, 'w:gz') as tar:
                    with cd(tmptardir):
                        tar.add('flamegraph_logs.{test_id}'.format(test_id=job['test_id']))
                assert os.path.exists(flamegraph_job_path)
            finally:
                shutil.rmtree(tmptardir)

        # Make a new tarball containing all the flamegraph and data
        if yourkit_logs:
            tmptardir = tempfile.mkdtemp()
            try:
                yourkit_tmp_dir = os.path.join(tmptardir, 'yourkit.{test_id}'.format(test_id=job['test_id']))
                os.mkdir(yourkit_tmp_dir)
                for x, yourkit in enumerate(yourkit_logs, 1):
                    with tarfile.open(yourkit) as tar:
                        tar.extractall(yourkit_tmp_dir)
                        tmp_dir = os.path.join(yourkit_tmp_dir, tar.getnames()[0])
                        os.rename(tmp_dir, os.path.join(yourkit_tmp_dir, 'revision_{x:02d}'.format(x=x)))

                yourkit_job_path = os.path.join(job_dir, 'yourkit.{test_id}.tar.gz'.format(test_id=job['test_id']))
                with tarfile.open(yourkit_job_path, 'w:gz') as tar:
                    with cd(tmptardir):
                        tar.add('yourkit.{test_id}'.format(test_id=job['test_id']))
                assert os.path.exists(yourkit_job_path)
            finally:
                shutil.rmtree(tmptardir)

        ## Stream artifacts
        ## Write final job status to 0.job_status file
        final_status = 'local_complete'
        try:
            # Stream artifacts:
            self.stream_artifacts(job['test_id'])
            if self.__ws_client.in_sync():
                final_status = 'server_complete'

            # Spot check stats to ensure it has the data it should
            # contain. Raises JobFailure if something's amiss.
            try:
                self.__spot_check_stats(job, stats_path)
            except JobFailure, e:
                if final_status == 'server_complete':
                    final_status = 'server_fail'
                else:
                    final_status = 'local_fail'
                raise
        finally:
            with open(os.path.join(job_dir, '0.job_status'), 'w') as f:
                f.write(final_status)
Example #14
0
 def __good_bye(self):
     """Tell the server we're disconnecting"""
     command = Command.new(self.socket(), action="good_bye")
     log.debug("Sending goodbye message to server..")
     command.send(await_response=False)
Example #15
0
 def __good_bye(self):
     """Tell the server we're disconnecting"""
     ##{type:'command', command_id:'llll', action:'good_bye'}
     command = Command.new(self.ws, action="good_bye")
     log.debug("Sending goodbye message to server..")
     command.send(await_response=False)
Example #16
0
 def __good_bye(self):
     """Tell the server we're disconnecting"""
     ##{type:'command', command_id:'llll', action:'good_bye'}
     command = Command.new(self.ws, action="good_bye")
     log.debug("Sending goodbye message to server..")
     command.send(await_response=False)
Example #17
0
    def perform_job(self, job, ws):
        """Perform a job the server gave us, stream output and artifacts to the given websocket."""
        job = copy.deepcopy(job['test_definition'])
        # Cleanup the job structure according to what stress_compare needs:
        for operation in job['operations']:
            operation['type'] = operation['operation']
            del operation['operation']

        job_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf','jobs',job['test_id'])
        mkpath(job_dir)
        stats_path = os.path.join(job_dir,'stats.{test_id}.json'.format(test_id=job['test_id']))
        stress_log_path = os.path.join(job_dir,'stress_compare.{test_id}.log'.format(test_id=job['test_id']))

        stress_json = json.dumps(dict(revisions=job['revisions'],
                                      operations=job['operations'],
                                      title=job['title'],
                                      log=stats_path))

        # Create a temporary location to store the stress_compare json file:
        stress_json_path = os.path.join(job_dir, 'test.{test_id}.json'.format(test_id=job['test_id']))
        with open(stress_json_path, 'w') as f:
            f.write(stress_json)

        # Inform the server we will be streaming the console output to them:
        command = Command.new(self.ws, action='stream', test_id=job['test_id'], 
                              kind='console', name='console_out', eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER)
        response = self.send(command, assertions={'message':'ready'})

        # Start a status checking thread.
        # If a user cancel's the job after it's marked in_progress, we
        # need to periodically check for that state change and kill
        # our test:
        cancel_checker = JobCancellationTracker(urlparse.urlparse(self.ws_endpoint).netloc, job['test_id'])
        cancel_checker.start()

        # Run stress_compare in a separate process, collecting the
        # output as an artifact:
        try:
            # Run stress_compare with pexpect. subprocess.Popen didn't
            # work due to some kind of tty issue when invoking
            # nodetool.
            stress_proc = pexpect.spawn('cstar_perf_stress {stress_json_path}'.format(stress_json_path=stress_json_path), timeout=None)
            with open(stress_log_path, 'w') as stress_log:
                while True:
                    try:
                        with timeout(25):
                            line = stress_proc.readline()
                            if line == '':
                                break
                            stress_log.write(line)
                            sys.stdout.write(line)
                            self.send(base64.b64encode(line))
                    except TimeoutError:
                        self.send(base64.b64encode(KEEPALIVE_MARKER))
        finally:
            cancel_checker.stop()
            self.send(base64.b64encode(EOF_MARKER))

        response = self.receive(response, assertions={'message':'stream_received', 'done':True})

        # Find the log tarball for each revision by introspecting the stats json:
        system_logs = []
        log_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf','logs')
        
        with open(stats_path) as stats:
            stats = json.loads(stats.read())
            for rev in stats['revisions']:
                system_logs.append(os.path.join(log_dir, "{name}.tar.gz".format(name=rev['last_log'])))
        # Make a new tarball containing all the revision logs:
        tmptardir = tempfile.mkdtemp()
        try:
            job_log_dir = os.path.join(tmptardir, 'cassandra_logs.{test_id}'.format(test_id=job['test_id']))
            os.mkdir(job_log_dir)
            for x, syslog in enumerate(system_logs, 1):
                with tarfile.open(syslog) as tar:
                    tar.extractall(job_log_dir)
                    os.rename(os.path.join(job_log_dir, tar.getnames()[0]), os.path.join(job_log_dir, 'revision_{x:02d}'.format(x=x)))
            system_logs_path = os.path.join(job_dir, 'cassandra_logs.{test_id}.tar.gz'.format(test_id=job['test_id']))
            with tarfile.open(system_logs_path, 'w:gz') as tar:
                with cd(tmptardir):
                    tar.add('cassandra_logs.{test_id}'.format(test_id=job['test_id']))
            assert os.path.exists(system_logs_path)
        finally:
            shutil.rmtree(tmptardir)

        ## Stream artifacts
        ## Write final job status to 0.job_status file
        final_status = 'local_complete'
        try:
            # Stream artifacts:
            self.stream_artifacts(job['test_id'])
            if self.__server_synced:
                final_status = 'server_complete'

            # Spot check stats to ensure it has the data it should
            # contain. Raises JobFailure if something's amiss.
            try:
                self.__spot_check_stats(job, stats_path)
            except JobFailure, e:
                if final_status == 'server_complete':
                    final_status = 'server_fail'
                else:
                    final_status = 'local_fail'
                raise
        finally:
            with open(os.path.join(job_dir,'0.job_status'), 'w') as f:
                f.write(final_status)
Example #18
0
    def stream_artifact_in_chunks(self,
                                  job_id,
                                  kind,
                                  name,
                                  path,
                                  binary=False):
        """Stream job artifact to server in chunks"""

        file_size = os.path.getsize(path)
        object_id = generate_object_id(job_id, kind, name)
        object_sha = sha256_of_file(path)
        num_chunks = len(list(self._get_chunks(file_size)))

        query = Command.new(self.__ws_client.socket(),
                            action='chunk-stream-query',
                            object_id=object_id)
        query_result = self.__ws_client.send(query,
                                             assertions={'message': 'ok'})
        existing_chunk_shas = {}
        if 'stored_chunk_shas' in query_result and query_result[
                'stored_chunk_shas'] != '':
            existing_chunk_shas = dict(
                item.split(":")
                for item in query_result['stored_chunk_shas'].split(","))
            log.debug(
                "found existing stored chunks: {}".format(existing_chunk_shas))

        matching_uploaded_chunks = 0
        try:
            for chunk_start, chunk_size, chunk_id in self._get_chunks(
                    file_size):
                # skip if server already has chunk stored
                if str(chunk_id) in existing_chunk_shas:
                    log.info(
                        "chunk {} already exists on server skipping upload".
                        format(chunk_id))
                    continue

                log.info("sending artifact[{}][{}] chunk: {}".format(
                    name, object_id, chunk_id))

                command = Command.new(self.__ws_client.socket(),
                                      action='chunk-stream',
                                      test_id=job_id,
                                      file_size=file_size,
                                      num_of_chunks=num_chunks,
                                      chunk_id=chunk_id,
                                      object_id=object_id,
                                      object_sha=object_sha,
                                      chunk_size=chunk_size,
                                      kind=kind,
                                      name=name,
                                      eof=EOF_MARKER,
                                      keepalive=KEEPALIVE_MARKER,
                                      binary=binary)
                response = self.__ws_client.send(
                    command, assertions={'message': 'ready'})

                chunk_sha = hashlib.sha256()
                with open(path) as fh:
                    fh.seek(chunk_start, os.SEEK_SET)
                    while fh.tell() < (chunk_start + chunk_size):
                        byte_size = 512 if fh.tell() + 512 < (
                            chunk_start + chunk_size) else (
                                chunk_start + chunk_size) - fh.tell()
                        data = fh.read(byte_size)
                        chunk_sha.update(data)
                        data = base64.b64encode(data)
                        self.__ws_client.send(data)
                    self.__ws_client.send(base64.b64encode(EOF_MARKER))
                    response = self.__ws_client.receive(response,
                                                        assertions={
                                                            'message':
                                                            'chunk_received',
                                                            'done': True
                                                        })

                    if 'chunk_sha' in response and chunk_sha.hexdigest(
                    ) == response['chunk_sha']:
                        matching_uploaded_chunks += 1
                    else:
                        log.error(
                            'chunk upload failed: response[{}], objectid: [{}], chunkid: [{}], totalchunks: [{}], name: [{}]'
                            .format(response, object_id, chunk_id, num_chunks,
                                    name))
        finally:
            uploaded_successfully = matching_uploaded_chunks == num_chunks
            log.info("UPLOADED SUCCESS: {}".format(uploaded_successfully))
            self.__ws_client.send(Command.new(self.__ws_client.socket(),
                                              action='chunk-stream-complete',
                                              successful=uploaded_successfully,
                                              test_id=job_id,
                                              object_id=object_id,
                                              kind=kind,
                                              name=name),
                                  assertions={'message': 'ok'})
Example #19
0
    def perform_job(self, job, ws):
        """Perform a job the server gave us, stream output and artifacts to the given websocket."""
        job = copy.deepcopy(job['test_definition'])
        # Cleanup the job structure according to what stress_compare needs:
        for operation in job['operations']:
            operation['type'] = operation['operation']
            del operation['operation']

        job_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf','jobs',job['test_id'])
        mkpath(job_dir)
        stats_path = os.path.join(job_dir,'stats.{test_id}.json'.format(test_id=job['test_id']))
        summary_path = os.path.join(job_dir,'stats_summary.{test_id}.json'.format(test_id=job['test_id']))
        stress_log_path = os.path.join(job_dir,'stress_compare.{test_id}.log'.format(test_id=job['test_id']))

        stress_json = json.dumps(dict(revisions=job['revisions'],
                                      operations=job['operations'],
                                      title=job['title'],
                                      log=stats_path))

        # Create a temporary location to store the stress_compare json file:
        stress_json_path = os.path.join(job_dir, 'test.{test_id}.json'.format(test_id=job['test_id']))
        with open(stress_json_path, 'w') as f:
            f.write(stress_json)

        # Inform the server we will be streaming the console output to them:
        command = Command.new(self.ws, action='stream', test_id=job['test_id'], 
                              kind='console', name='console_out', eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER)
        response = self.send(command, assertions={'message':'ready'})

        # Start a status checking thread.
        # If a user cancel's the job after it's marked in_progress, we
        # need to periodically check for that state change and kill
        # our test:
        cancel_checker = JobCancellationTracker(urlparse.urlparse(self.ws_endpoint).netloc, job['test_id'])
        cancel_checker.start()

        # Run stress_compare in a separate process, collecting the
        # output as an artifact:
        try:
            # Run stress_compare with pexpect. subprocess.Popen didn't
            # work due to some kind of tty issue when invoking
            # nodetool.
            stress_proc = pexpect.spawn('cstar_perf_stress {stress_json_path}'.format(stress_json_path=stress_json_path), timeout=None)
            with open(stress_log_path, 'w') as stress_log:
                while True:
                    try:
                        with timeout(25):
                            line = stress_proc.readline()
                            if line == '':
                                break
                            stress_log.write(line)
                            sys.stdout.write(line)
                            self.send(base64.b64encode(line))
                    except TimeoutError:
                        self.send(base64.b64encode(KEEPALIVE_MARKER))
        finally:
            cancel_checker.stop()
            self.send(base64.b64encode(EOF_MARKER))

        response = self.receive(response, assertions={'message':'stream_received', 'done':True})

        # Find the log tarball for each revision by introspecting the stats json:
        system_logs = []
        log_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf','logs')
        
        with open(stats_path) as stats:
            stats = json.loads(stats.read())
            for rev in stats['revisions']:
                system_logs.append(os.path.join(log_dir, "{name}.tar.gz".format(name=rev['last_log'])))
            with open(summary_path, 'w') as summary:
                for rev in job['revisions']:
                    for op_num, op in enumerate(job['operations']):
                        if op['type'] == 'stress':
                            del stats['stats'][op_num]['intervals']
                json.dump(obj=stats, fp=summary, sort_keys=True, indent=4, separators=(',', ': '))
        # Make a new tarball containing all the revision logs:
        tmptardir = tempfile.mkdtemp()
        try:
            job_log_dir = os.path.join(tmptardir, 'cassandra_logs.{test_id}'.format(test_id=job['test_id']))
            os.mkdir(job_log_dir)
            for x, syslog in enumerate(system_logs, 1):
                with tarfile.open(syslog) as tar:
                    tar.extractall(job_log_dir)
                    os.rename(os.path.join(job_log_dir, tar.getnames()[0]), os.path.join(job_log_dir, 'revision_{x:02d}'.format(x=x)))
            system_logs_path = os.path.join(job_dir, 'cassandra_logs.{test_id}.tar.gz'.format(test_id=job['test_id']))
            with tarfile.open(system_logs_path, 'w:gz') as tar:
                with cd(tmptardir):
                    tar.add('cassandra_logs.{test_id}'.format(test_id=job['test_id']))
            assert os.path.exists(system_logs_path)
        finally:
            shutil.rmtree(tmptardir)

        ## Stream artifacts
        ## Write final job status to 0.job_status file
        final_status = 'local_complete'
        try:
            # Stream artifacts:
            self.stream_artifacts(job['test_id'])
            if self.__server_synced:
                final_status = 'server_complete'

            # Spot check stats to ensure it has the data it should
            # contain. Raises JobFailure if something's amiss.
            try:
                self.__spot_check_stats(job, stats_path)
            except JobFailure, e:
                if final_status == 'server_complete':
                    final_status = 'server_fail'
                else:
                    final_status = 'local_fail'
                raise
        finally:
            with open(os.path.join(job_dir,'0.job_status'), 'w') as f:
                f.write(final_status)
Example #20
0
    def perform_job(self, job):
        """Perform a job the server gave us, stream output and artifacts to the given websocket."""
        job = copy.deepcopy(job['test_definition'])
        # Cleanup the job structure according to what stress_compare needs:
        for operation in job['operations']:
            operation['type'] = operation['operation']
            del operation['operation']

        job_dir = os.path.join(os.path.expanduser('~'), '.cstar_perf', 'jobs',
                               job['test_id'])
        mkpath(job_dir)
        stats_path = os.path.join(
            job_dir, 'stats.{test_id}.json'.format(test_id=job['test_id']))
        summary_path = os.path.join(
            job_dir,
            'stats_summary.{test_id}.json'.format(test_id=job['test_id']))
        stress_log_path = os.path.join(
            job_dir,
            'stress_compare.{test_id}.log'.format(test_id=job['test_id']))

        stress_json = json.dumps(
            dict(revisions=job['revisions'],
                 operations=job['operations'],
                 title=job['title'],
                 leave_data=job.get('leave_data', False),
                 log=stats_path))

        # Create a temporary location to store the stress_compare json file:
        stress_json_path = os.path.join(
            job_dir, 'test.{test_id}.json'.format(test_id=job['test_id']))
        with open(stress_json_path, 'w') as f:
            f.write(stress_json)

        # Inform the server we will be streaming the console output to them:
        command = Command.new(
            self.__ws_client.socket(),
            action='stream',
            test_id=job['test_id'],
            kind='console',
            name="stress_compare.{test_id}.log".format(test_id=job['test_id']),
            eof=EOF_MARKER,
            keepalive=KEEPALIVE_MARKER)
        response = self.__ws_client.send(command,
                                         assertions={'message': 'ready'})

        # Start a status checking thread.
        # If a user cancel's the job after it's marked in_progress, we
        # need to periodically check for that state change and kill
        # our test:
        cancel_checker = JobCancellationTracker(
            urlparse.urlparse(self.ws_endpoint).netloc, job['test_id'])
        cancel_checker.start()

        # stats file observer
        # looks for changes to update server with status progress message
        observer = Observer()
        observer.schedule(UpdateServerProgressMessageHandler(
            job,
            urlparse.urlparse(self.ws_endpoint).netloc),
                          os.path.join(os.path.expanduser("~"), '.cstar_perf',
                                       'jobs'),
                          recursive=True)
        observer.start()

        # Run stress_compare in a separate process, collecting the
        # output as an artifact:
        try:
            # Run stress_compare with pexpect. subprocess.Popen didn't
            # work due to some kind of tty issue when invoking
            # nodetool.
            stress_proc = pexpect.spawn(
                'cstar_perf_stress {stress_json_path}'.format(
                    stress_json_path=stress_json_path),
                timeout=None)
            with open(stress_log_path, 'w') as stress_log:
                while True:
                    try:
                        with timeout(25):
                            line = stress_proc.readline()
                            if line == '':
                                break
                            stress_log.write(line)
                            sys.stdout.write(line)
                            self.__ws_client.send(base64.b64encode(line))
                    except TimeoutError:
                        self.__ws_client.send(
                            base64.b64encode(KEEPALIVE_MARKER))
        finally:
            cancel_checker.stop()
            observer.stop()
            self.__ws_client.send(base64.b64encode(EOF_MARKER))

        response = self.__ws_client.receive(response,
                                            assertions={
                                                'message': 'stream_received',
                                                'done': True
                                            })

        # Find the log tarball for each revision by introspecting the stats json:
        system_logs = []
        flamegraph_logs = []
        yourkit_logs = []
        log_dir = CSTAR_PERF_LOGS_DIR
        flamegraph_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf',
                                      'flamegraph')
        yourkit_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf',
                                   'yourkit')
        #Create a stats summary file without voluminous interval data
        if os.path.isfile(stats_path):
            with open(stats_path) as stats:
                stats = json.loads(stats.read())
                for rev in stats['revisions']:
                    last_log_rev_id = rev.get('last_log')
                    if last_log_rev_id:
                        system_logs.append(
                            os.path.join(
                                log_dir,
                                "{name}.tar.gz".format(name=last_log_rev_id)))
                        fg_path = os.path.join(
                            flamegraph_dir,
                            "{name}.tar.gz".format(name=last_log_rev_id))
                        yourkit_path = os.path.join(
                            yourkit_dir,
                            "{name}.tar.gz".format(name=last_log_rev_id))
                        if os.path.exists(fg_path):
                            flamegraph_logs.append(fg_path)
                        if os.path.exists(yourkit_path):
                            yourkit_logs.append(yourkit_path)
                with open(summary_path, 'w') as summary:
                    hadStats = False
                    for op in stats['stats']:
                        if op['type'] == 'stress':
                            try:
                                del op['intervals']
                                hadStats = True
                            except KeyError:
                                pass
                        try:
                            del op['output']
                        except KeyError:
                            pass
                    if hadStats:
                        json.dump(obj=stats,
                                  fp=summary,
                                  sort_keys=True,
                                  indent=4,
                                  separators=(',', ': '))

        # Make a new tarball containing all the revision logs:
        tmptardir = tempfile.mkdtemp()
        try:
            startup_log_tarball = self._maybe_get_startup_log_tarball(
                job['test_id'], log_dir)
            if startup_log_tarball:
                system_logs.append(startup_log_tarball)
            job_log_dir = os.path.join(
                tmptardir,
                'cassandra_logs.{test_id}'.format(test_id=job['test_id']))
            os.mkdir(job_log_dir)
            for x, syslog in enumerate(system_logs, 1):
                with tarfile.open(syslog) as tar:
                    tar.extractall(job_log_dir)
                    os.rename(
                        os.path.join(job_log_dir,
                                     tar.getnames()[0]),
                        os.path.join(job_log_dir,
                                     'revision_{x:02d}'.format(x=x)))
            system_logs_path = os.path.join(
                job_dir, 'cassandra_logs.{test_id}.tar.gz'.format(
                    test_id=job['test_id']))
            with tarfile.open(system_logs_path, 'w:gz') as tar:
                with cd(tmptardir):
                    tar.add('cassandra_logs.{test_id}'.format(
                        test_id=job['test_id']))
            assert os.path.exists(system_logs_path)
        finally:
            shutil.rmtree(tmptardir)

        # Make a new tarball containing all the flamegraph and data
        if flamegraph_logs:
            tmptardir = tempfile.mkdtemp()
            try:
                flamegraph_tmp_dir = os.path.join(
                    tmptardir,
                    'flamegraph_logs.{test_id}'.format(test_id=job['test_id']))
                os.mkdir(flamegraph_tmp_dir)
                for x, flamegraph in enumerate(flamegraph_logs, 1):
                    with tarfile.open(flamegraph) as tar:
                        tar.extractall(flamegraph_tmp_dir)
                        tmp_dir = os.path.join(flamegraph_tmp_dir,
                                               tar.getnames()[0])

                        # Copy all flamegraph as artifacts
                        for node_dir in os.listdir(tmp_dir):
                            glob_match = os.path.join(
                                os.path.join(tmp_dir, node_dir), '*.svg')
                            graphs = glob.glob(glob_match)
                            for graph in graphs:
                                graph_name = os.path.basename(graph).replace(
                                    'flamegraph_', 'flamegraph_{}_{}_'.format(
                                        job['test_id'], node_dir))
                                graph_dst_filename = os.path.join(
                                    job_dir, graph_name)
                                shutil.copyfile(graph, graph_dst_filename)

                        os.rename(
                            tmp_dir,
                            os.path.join(flamegraph_tmp_dir,
                                         'revision_{x:02d}'.format(x=x)))

                flamegraph_job_path = os.path.join(
                    job_dir, 'flamegraph_logs.{test_id}.tar.gz'.format(
                        test_id=job['test_id']))
                with tarfile.open(flamegraph_job_path, 'w:gz') as tar:
                    with cd(tmptardir):
                        tar.add('flamegraph_logs.{test_id}'.format(
                            test_id=job['test_id']))
                assert os.path.exists(flamegraph_job_path)
            finally:
                shutil.rmtree(tmptardir)

        # Make a new tarball containing all the flamegraph and data
        if yourkit_logs:
            tmptardir = tempfile.mkdtemp()
            try:
                yourkit_tmp_dir = os.path.join(
                    tmptardir,
                    'yourkit.{test_id}'.format(test_id=job['test_id']))
                os.mkdir(yourkit_tmp_dir)
                for x, yourkit in enumerate(yourkit_logs, 1):
                    with tarfile.open(yourkit) as tar:
                        tar.extractall(yourkit_tmp_dir)
                        tmp_dir = os.path.join(yourkit_tmp_dir,
                                               tar.getnames()[0])
                        os.rename(
                            tmp_dir,
                            os.path.join(yourkit_tmp_dir,
                                         'revision_{x:02d}'.format(x=x)))

                yourkit_job_path = os.path.join(
                    job_dir,
                    'yourkit.{test_id}.tar.gz'.format(test_id=job['test_id']))
                with tarfile.open(yourkit_job_path, 'w:gz') as tar:
                    with cd(tmptardir):
                        tar.add(
                            'yourkit.{test_id}'.format(test_id=job['test_id']))
                assert os.path.exists(yourkit_job_path)
            finally:
                shutil.rmtree(tmptardir)

        ## Stream artifacts
        ## Write final job status to 0.job_status file
        final_status = 'local_complete'
        try:
            # Stream artifacts:
            self.stream_artifacts(job['test_id'])
            if self.__ws_client.in_sync():
                final_status = 'server_complete'

            # Spot check stats to ensure it has the data it should
            # contain. Raises JobFailure if something's amiss.
            try:
                self.__spot_check_stats(job, stats_path)
            except JobFailure, e:
                if final_status == 'server_complete':
                    final_status = 'server_fail'
                else:
                    final_status = 'local_fail'
                raise
        finally:
            with open(os.path.join(job_dir, '0.job_status'), 'w') as f:
                f.write(final_status)