def test_simple(self): foo = 'aaa\naaa\naaa\n' result = list(chunked(foo, 5)) assert len(result) == 3 assert result[0] == 'aaa\n' assert result[1] == 'aaa\n' assert result[2] == 'aaa\n' result = list(chunked(foo, 8)) assert len(result) == 2 assert result[0] == 'aaa\naaa\n' assert result[1] == 'aaa\n' result = list(chunked(foo, 4)) assert len(result) == 3 assert result[0] == 'aaa\n' assert result[1] == 'aaa\n' assert result[2] == 'aaa\n' foo = 'a' * 10 result = list(chunked(foo, 2)) assert len(result) == 5 assert all(r == 'aa' for r in result) foo = 'aaaa\naaaa' result = list(chunked(foo, 3)) assert len(result) == 4
def _sync_artifact_as_log(self, artifact): jobstep = artifact.step job = artifact.job logsource, created = get_or_create(LogSource, where={ 'name': artifact.data['displayPath'], 'job': job, 'step': jobstep, }, defaults={ 'job': job, 'project': job.project, 'date_created': job.date_started, }) offset = 0 with closing(self.fetch_artifact(jobstep, artifact.data)) as resp: iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size
def _sync_log(self, jobstep, name, job_name, build_no): job = jobstep.job logsource, created = get_or_create(LogSource, where={ 'name': name, 'step': jobstep, }, defaults={ 'job': job, 'project': jobstep.project, 'date_created': jobstep.date_started, }) if created: offset = 0 else: offset = jobstep.data.get('log_offset', 0) url = '{base}/job/{job}/{build}/logText/progressiveText/'.format( base=jobstep.data['master'], job=job_name, build=build_no, ) session = self.http_session with closing(session.get(url, params={'start': offset}, stream=True, timeout=15)) as resp: log_length = int(resp.headers['X-Text-Size']) # When you request an offset that doesnt exist in the build log, Jenkins # will instead return the entire log. Jenkins also seems to provide us # with X-Text-Size which indicates the total size of the log if offset > log_length: return # XXX: requests doesnt seem to guarantee chunk_size, so we force it # with our own helper iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size # Jenkins will suggest to us that there is more data when the job has # yet to complete has_more = resp.headers.get('X-More-Data') == 'true' # We **must** track the log offset externally as Jenkins embeds encoded # links and we cant accurately predict the next `start` param. jobstep.data['log_offset'] = log_length db.session.add(jobstep) return True if has_more else None
def _sync_artifact_as_log(self, artifact): jobstep = artifact.step job = artifact.job logsource, created = get_or_create(LogSource, where={ 'name': artifact.data['displayPath'], 'job': job, 'step': jobstep, }, defaults={ 'job': job, 'project': job.project, 'date_created': job.date_started, }) url = '{base}/job/{job}/{build}/artifact/{artifact}'.format( base=self.base_url, job=jobstep.data['job_name'], build=jobstep.data['build_no'], artifact=artifact.data['relativePath'], ) offset = 0 session = requests.Session() with closing(session.get(url, stream=True, timeout=15)) as resp: iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size
def _sync_artifact_as_log(self, artifact): jobstep = artifact.step job = artifact.job logsource, created = get_or_create( LogSource, where={"name": artifact.data["displayPath"], "job": job, "step": jobstep}, defaults={"job": job, "project": job.project, "date_created": job.date_started}, ) offset = 0 with closing(self.fetch_artifact(jobstep, artifact.data)) as resp: iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update( LogChunk, where={"source": logsource, "offset": offset}, values={"job": job, "project": job.project, "size": chunk_size, "text": chunk}, ) offset += chunk_size db.session.commit()
def _sync_artifact_as_log(self, artifact): jobstep = artifact.step job = artifact.job logsource, created = get_or_create(LogSource, where={ 'name': artifact.data['displayPath'], 'job': job, 'step': jobstep, }, defaults={ 'job': job, 'project': job.project, 'date_created': job.date_started, }) url = '{base}/job/{job}/{build}/artifact/{artifact}'.format( base=jobstep.data['master'], job=jobstep.data['job_name'], build=jobstep.data['build_no'], artifact=artifact.data['relativePath'], ) offset = 0 session = self.http_session with closing(session.get(url, stream=True, timeout=15)) as resp: iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size
def _sync_log(self, jobstep): bucket_name = self._get_artifactstore_bucket(jobstep) # Note: artifactstore may alter the log name to deduplicate it, so always use data.get('log_artifact_name') artifact_name = jobstep.data.get('log_artifact_name') if not artifact_name: artifact_name = self.artifact_store_client\ .create_chunked_artifact(bucket_name, artifact_name=JENKINS_LOG_NAME).name jobstep.data['log_artifact_name'] = artifact_name db.session.add(jobstep) db.session.commit() logsource, created = get_or_create(LogSource, where={ 'name': artifact_name, 'step': jobstep, }, defaults={ 'job': jobstep.job, 'project': jobstep.project, 'date_created': jobstep.date_started, 'in_artifact_store': True, }) if created: offset = 0 else: offset = jobstep.data.get('log_offset', 0) url = '{base}/job/{job}/{build}/logText/progressiveText/'.format( base=jobstep.data['master'], job=jobstep.data['job_name'], build=jobstep.data['build_no'], ) start_time = time.time() with closing(self._streaming_get(url, params={'start': offset})) as resp: log_length = int(resp.headers['X-Text-Size']) # When you request an offset that doesnt exist in the build log, Jenkins # will instead return the entire log. Jenkins also seems to provide us # with X-Text-Size which indicates the total size of the log if offset > log_length: return # Jenkins will suggest to us that there is more data when the job has # yet to complete has_more = resp.headers.get('X-More-Data') == 'true' # XXX: requests doesnt seem to guarantee chunk_size, so we force it # with our own helper iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) try: self.artifact_store_client.post_artifact_chunk(bucket_name, artifact_name, offset, chunk) offset += chunk_size if time.time() > start_time + LOG_SYNC_TIMEOUT_SECS: raise RuntimeError('TOO LONG TO DOWNLOAD LOG: %s' % logsource.get_url()) except Exception as e: # On an exception or a timeout, attempt to truncate the log # Catch all exceptions, including timeouts and HTTP errors self.logger.warning('Exception when uploading logchunks: %s', e.message) has_more = False warning = ("\nLOG TRUNCATED. SEE FULL LOG AT " "{base}/job/{job}/{build}/consoleText\n").format( base=jobstep.data['master'], job=jobstep.data['job_name'], build=jobstep.data['build_no']) self.artifact_store_client.post_artifact_chunk(bucket_name, artifact_name, offset, warning) break # We **must** track the log offset externally as Jenkins embeds encoded # links and we cant accurately predict the next `start` param. jobstep.data['log_offset'] = log_length db.session.add(jobstep) if not has_more: self.artifact_store_client.close_chunked_artifact(bucket_name, artifact_name) return True if has_more else None
def _sync_log(self, jobstep, name, job_name, build_no): job = jobstep.job logsource, created = get_or_create(LogSource, where={ 'name': name, 'step': jobstep, }, defaults={ 'job': job, 'project': jobstep.project, 'date_created': jobstep.date_started, }) if created: offset = 0 else: offset = jobstep.data.get('log_offset', 0) url = '{base}/job/{job}/{build}/logText/progressiveText/'.format( base=jobstep.data['master'], job=job_name, build=build_no, ) start_time = time.time() with closing(self._streaming_get(url, params={'start': offset})) as resp: log_length = int(resp.headers['X-Text-Size']) # When you request an offset that doesnt exist in the build log, Jenkins # will instead return the entire log. Jenkins also seems to provide us # with X-Text-Size which indicates the total size of the log if offset > log_length: return # XXX: requests doesnt seem to guarantee chunk_size, so we force it # with our own helper iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size if time.time() > start_time + LOG_SYNC_TIMEOUT_SECS: warning = ("\nTRUNCATED LOG: TOOK TOO LONG TO DOWNLOAD FROM JENKINS. SEE FULL LOG AT " "{base}/job/{job}/{build}/consoleText\n").format( base=jobstep.data['master'], job=job_name, build=build_no) create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': len(warning), 'text': warning, }) offset += chunk_size self.logger.warning('log download took too long: %s', logsource.get_url()) break # Jenkins will suggest to us that there is more data when the job has # yet to complete has_more = resp.headers.get('X-More-Data') == 'true' # We **must** track the log offset externally as Jenkins embeds encoded # links and we cant accurately predict the next `start` param. jobstep.data['log_offset'] = log_length db.session.add(jobstep) return True if has_more else None
def _sync_log(self, jobstep, name, job_name, build_no): job = jobstep.job logsource, created = get_or_create(LogSource, where={ 'name': name, 'step': jobstep, }, defaults={ 'job': job, 'project': jobstep.project, 'date_created': jobstep.date_started, }) if created: offset = 0 else: offset = jobstep.data.get('log_offset', 0) url = '{base}/job/{job}/{build}/logText/progressiveText/'.format( base=jobstep.data['master'], job=job_name, build=build_no, ) start_time = time.time() with closing(self._streaming_get(url, params={'start': offset})) as resp: log_length = int(resp.headers['X-Text-Size']) # When you request an offset that doesnt exist in the build log, Jenkins # will instead return the entire log. Jenkins also seems to provide us # with X-Text-Size which indicates the total size of the log if offset > log_length: return # XXX: requests doesnt seem to guarantee chunk_size, so we force it # with our own helper iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size if time.time() > start_time + LOG_SYNC_TIMEOUT_SECS: warning = ( "\nTRUNCATED LOG: TOOK TOO LONG TO DOWNLOAD FROM JENKINS. SEE FULL LOG AT " "{base}/job/{job}/{build}/consoleText\n").format( base=jobstep.data['master'], job=job_name, build=build_no) create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': job, 'project': job.project, 'size': len(warning), 'text': warning, }) offset += chunk_size self.logger.warning('log download took too long: %s', logsource.get_url()) break # Jenkins will suggest to us that there is more data when the job has # yet to complete has_more = resp.headers.get('X-More-Data') == 'true' # We **must** track the log offset externally as Jenkins embeds encoded # links and we cant accurately predict the next `start` param. jobstep.data['log_offset'] = log_length db.session.add(jobstep) return True if has_more else None
def _sync_log(self, jobstep): bucket_name = self._get_artifactstore_bucket(jobstep) # Note: artifactstore may alter the log name to deduplicate it, so always use data.get('log_artifact_name') artifact_name = jobstep.data.get('log_artifact_name') if not artifact_name: artifact_name = self.artifact_store_client\ .create_chunked_artifact(bucket_name, artifact_name=JENKINS_LOG_NAME).name jobstep.data['log_artifact_name'] = artifact_name db.session.add(jobstep) db.session.commit() logsource, created = get_or_create(LogSource, where={ 'name': artifact_name, 'step': jobstep, }, defaults={ 'job': jobstep.job, 'project': jobstep.project, 'date_created': jobstep.date_started, 'in_artifact_store': True, }) if created: offset = 0 else: offset = jobstep.data.get('log_offset', 0) url = '{base}/job/{job}/{build}/logText/progressiveText/'.format( base=jobstep.data['master'], job=jobstep.data['job_name'], build=jobstep.data['build_no'], ) start_time = time.time() with closing(self._streaming_get(url, params={'start': offset})) as resp: log_length = int(resp.headers['X-Text-Size']) # When you request an offset that doesnt exist in the build log, Jenkins # will instead return the entire log. Jenkins also seems to provide us # with X-Text-Size which indicates the total size of the log if offset > log_length: return # Jenkins will suggest to us that there is more data when the job has # yet to complete has_more = resp.headers.get('X-More-Data') == 'true' # XXX: requests doesnt seem to guarantee chunk_size, so we force it # with our own helper iterator = resp.iter_content() for chunk in chunked(iterator, LOG_CHUNK_SIZE): chunk_size = len(chunk) try: self.artifact_store_client.post_artifact_chunk( bucket_name, artifact_name, offset, chunk) offset += chunk_size if time.time() > start_time + LOG_SYNC_TIMEOUT_SECS: raise RuntimeError('TOO LONG TO DOWNLOAD LOG: %s' % logsource.get_url()) except Exception as e: # On an exception or a timeout, attempt to truncate the log # Catch all exceptions, including timeouts and HTTP errors self.logger.warning( 'Exception when uploading logchunks: %s', e.message) has_more = False warning = ( "\nLOG TRUNCATED. SEE FULL LOG AT " "{base}/job/{job}/{build}/consoleText\n").format( base=jobstep.data['master'], job=jobstep.data['job_name'], build=jobstep.data['build_no']) self.artifact_store_client.post_artifact_chunk( bucket_name, artifact_name, offset, warning) break # We **must** track the log offset externally as Jenkins embeds encoded # links and we cant accurately predict the next `start` param. jobstep.data['log_offset'] = log_length db.session.add(jobstep) if not has_more: self.artifact_store_client.close_chunked_artifact( bucket_name, artifact_name) return True if has_more else None
def post(self, step_id): """ Create a new LogSource or append to an existing source (by name) a given set of chunks. Very basic soft checking is done to see if a chunk is already present in the database. Of note, it's not guaranteed to be correct as another commit could be in progress. """ step = JobStep.query.get(step_id) if step is None: return '', 404 args = self.parser.parse_args() logsource, _ = get_or_create(LogSource, where={ 'step_id': step.id, 'name': args.source, }, defaults={ 'project_id': step.project_id, 'job_id': step.job_id, }) offset = args.offset if offset is not None: # ensure we haven't already recorded an offset that could be # in this range existing_chunk = LogChunk.query.filter( LogChunk.source_id == logsource.id, offset >= LogChunk.offset, offset <= LogChunk.offset + LogChunk.size - 1, ).first() if existing_chunk is not None: # XXX(dcramer); this is more of an error but we make an assumption # that this happens because it was already sent existing_msg = { "error": "A chunk within the bounds of the given offset is already recorded." } return self.respond(existing_msg, status_code=204) else: offset = db.session.query( LogChunk.offset + LogChunk.size, ).filter( LogChunk.source_id == logsource.id, ).order_by( LogChunk.offset.desc(), ).limit(1).scalar() or 0 logchunks = [] for chunk in chunked(args.text, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': step.job, 'project': step.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size logchunks.append(chunk) context = self.serialize({ 'source': logsource, 'chunks': [{ 'id': c.id, 'offset': c.offset, 'size': c.size, } for c in logchunks] }) return self.respond(context, serialize=False)
def post(self, step_id): """ Create a new LogSource or append to an existing source (by name) a given set of chunks. Very basic soft checking is done to see if a chunk is already present in the database. Of note, it's not guaranteed to be correct as another commit could be in progress. """ step = JobStep.query.get(step_id) if step is None: return '', 404 args = self.parser.parse_args() logsource, _ = get_or_create(LogSource, where={ 'step_id': step.id, 'name': args.source, }, defaults={ 'project_id': step.project_id, 'job_id': step.job_id, }) offset = args.offset if offset is not None: # ensure we haven't already recorded an offset that could be # in this range existing_chunk = LogChunk.query.filter( LogChunk.source_id == logsource.id, offset >= LogChunk.offset, offset <= LogChunk.offset + LogChunk.size - 1, ).first() if existing_chunk is not None: # XXX(dcramer); this is more of an error but we make an assumption # that this happens because it was already sent existing_msg = {"error": "A chunk within the bounds of the given offset is already recorded."} return self.respond(existing_msg, status_code=204) else: offset = db.session.query( LogChunk.offset + LogChunk.size, ).filter( LogChunk.source_id == logsource.id, ).order_by( LogChunk.offset.desc(), ).limit(1).scalar() or 0 logchunks = [] for chunk in chunked(args.text, LOG_CHUNK_SIZE): chunk_size = len(chunk) chunk, _ = create_or_update(LogChunk, where={ 'source': logsource, 'offset': offset, }, values={ 'job': step.job, 'project': step.project, 'size': chunk_size, 'text': chunk, }) offset += chunk_size logchunks.append(chunk) context = self.serialize({ 'source': logsource, 'chunks': [{ 'id': c.id, 'offset': c.offset, 'size': c.size, } for c in logchunks] }) return self.respond(context, serialize=False)