Exemple #1
0
    def test_simple(self):
        foo = 'aaa\naaa\naaa\n'

        result = list(chunked(foo, 5))
        assert len(result) == 3
        assert result[0] == 'aaa\n'
        assert result[1] == 'aaa\n'
        assert result[2] == 'aaa\n'

        result = list(chunked(foo, 8))

        assert len(result) == 2
        assert result[0] == 'aaa\naaa\n'
        assert result[1] == 'aaa\n'

        result = list(chunked(foo, 4))

        assert len(result) == 3
        assert result[0] == 'aaa\n'
        assert result[1] == 'aaa\n'
        assert result[2] == 'aaa\n'

        foo = 'a' * 10

        result = list(chunked(foo, 2))
        assert len(result) == 5
        assert all(r == 'aa' for r in result)

        foo = 'aaaa\naaaa'

        result = list(chunked(foo, 3))
        assert len(result) == 4
Exemple #2
0
    def _sync_artifact_as_log(self, artifact):
        jobstep = artifact.step
        job = artifact.job

        logsource, created = get_or_create(LogSource, where={
            'name': artifact.data['displayPath'],
            'job': job,
            'step': jobstep,
        }, defaults={
            'job': job,
            'project': job.project,
            'date_created': job.date_started,
        })

        offset = 0
        with closing(self.fetch_artifact(jobstep, artifact.data)) as resp:
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                chunk, _ = create_or_update(LogChunk, where={
                    'source': logsource,
                    'offset': offset,
                }, values={
                    'job': job,
                    'project': job.project,
                    'size': chunk_size,
                    'text': chunk,
                })
                offset += chunk_size
Exemple #3
0
    def _sync_artifact_as_log(self, artifact):
        jobstep = artifact.step
        job = artifact.job

        logsource, created = get_or_create(LogSource,
                                           where={
                                               'name':
                                               artifact.data['displayPath'],
                                               'job': job,
                                               'step': jobstep,
                                           },
                                           defaults={
                                               'job': job,
                                               'project': job.project,
                                               'date_created':
                                               job.date_started,
                                           })

        offset = 0
        with closing(self.fetch_artifact(jobstep, artifact.data)) as resp:
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                chunk, _ = create_or_update(LogChunk,
                                            where={
                                                'source': logsource,
                                                'offset': offset,
                                            },
                                            values={
                                                'job': job,
                                                'project': job.project,
                                                'size': chunk_size,
                                                'text': chunk,
                                            })
                offset += chunk_size
Exemple #4
0
    def _sync_log(self, jobstep, name, job_name, build_no):
        job = jobstep.job
        logsource, created = get_or_create(LogSource, where={
            'name': name,
            'step': jobstep,
        }, defaults={
            'job': job,
            'project': jobstep.project,
            'date_created': jobstep.date_started,
        })
        if created:
            offset = 0
        else:
            offset = jobstep.data.get('log_offset', 0)

        url = '{base}/job/{job}/{build}/logText/progressiveText/'.format(
            base=jobstep.data['master'],
            job=job_name,
            build=build_no,
        )

        session = self.http_session
        with closing(session.get(url, params={'start': offset}, stream=True, timeout=15)) as resp:
            log_length = int(resp.headers['X-Text-Size'])

            # When you request an offset that doesnt exist in the build log, Jenkins
            # will instead return the entire log. Jenkins also seems to provide us
            # with X-Text-Size which indicates the total size of the log
            if offset > log_length:
                return

            # XXX: requests doesnt seem to guarantee chunk_size, so we force it
            # with our own helper
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                chunk, _ = create_or_update(LogChunk, where={
                    'source': logsource,
                    'offset': offset,
                }, values={
                    'job': job,
                    'project': job.project,
                    'size': chunk_size,
                    'text': chunk,
                })
                offset += chunk_size

            # Jenkins will suggest to us that there is more data when the job has
            # yet to complete
            has_more = resp.headers.get('X-More-Data') == 'true'

        # We **must** track the log offset externally as Jenkins embeds encoded
        # links and we cant accurately predict the next `start` param.
        jobstep.data['log_offset'] = log_length
        db.session.add(jobstep)

        return True if has_more else None
Exemple #5
0
    def _sync_log(self, jobstep, name, job_name, build_no):
        job = jobstep.job
        logsource, created = get_or_create(LogSource, where={
            'name': name,
            'step': jobstep,
        }, defaults={
            'job': job,
            'project': jobstep.project,
            'date_created': jobstep.date_started,
        })
        if created:
            offset = 0
        else:
            offset = jobstep.data.get('log_offset', 0)

        url = '{base}/job/{job}/{build}/logText/progressiveText/'.format(
            base=jobstep.data['master'],
            job=job_name,
            build=build_no,
        )

        session = self.http_session
        with closing(session.get(url, params={'start': offset}, stream=True, timeout=15)) as resp:
            log_length = int(resp.headers['X-Text-Size'])

            # When you request an offset that doesnt exist in the build log, Jenkins
            # will instead return the entire log. Jenkins also seems to provide us
            # with X-Text-Size which indicates the total size of the log
            if offset > log_length:
                return

            # XXX: requests doesnt seem to guarantee chunk_size, so we force it
            # with our own helper
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                chunk, _ = create_or_update(LogChunk, where={
                    'source': logsource,
                    'offset': offset,
                }, values={
                    'job': job,
                    'project': job.project,
                    'size': chunk_size,
                    'text': chunk,
                })
                offset += chunk_size

            # Jenkins will suggest to us that there is more data when the job has
            # yet to complete
            has_more = resp.headers.get('X-More-Data') == 'true'

        # We **must** track the log offset externally as Jenkins embeds encoded
        # links and we cant accurately predict the next `start` param.
        jobstep.data['log_offset'] = log_length
        db.session.add(jobstep)

        return True if has_more else None
Exemple #6
0
    def _sync_artifact_as_log(self, artifact):
        jobstep = artifact.step
        job = artifact.job

        logsource, created = get_or_create(LogSource,
                                           where={
                                               'name':
                                               artifact.data['displayPath'],
                                               'job': job,
                                               'step': jobstep,
                                           },
                                           defaults={
                                               'job': job,
                                               'project': job.project,
                                               'date_created':
                                               job.date_started,
                                           })

        url = '{base}/job/{job}/{build}/artifact/{artifact}'.format(
            base=self.base_url,
            job=jobstep.data['job_name'],
            build=jobstep.data['build_no'],
            artifact=artifact.data['relativePath'],
        )

        offset = 0
        session = requests.Session()
        with closing(session.get(url, stream=True, timeout=15)) as resp:
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                chunk, _ = create_or_update(LogChunk,
                                            where={
                                                'source': logsource,
                                                'offset': offset,
                                            },
                                            values={
                                                'job': job,
                                                'project': job.project,
                                                'size': chunk_size,
                                                'text': chunk,
                                            })
                offset += chunk_size
Exemple #7
0
    def _sync_artifact_as_log(self, artifact):
        jobstep = artifact.step
        job = artifact.job

        logsource, created = get_or_create(
            LogSource,
            where={"name": artifact.data["displayPath"], "job": job, "step": jobstep},
            defaults={"job": job, "project": job.project, "date_created": job.date_started},
        )

        offset = 0
        with closing(self.fetch_artifact(jobstep, artifact.data)) as resp:
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                chunk, _ = create_or_update(
                    LogChunk,
                    where={"source": logsource, "offset": offset},
                    values={"job": job, "project": job.project, "size": chunk_size, "text": chunk},
                )
                offset += chunk_size

        db.session.commit()
Exemple #8
0
    def _sync_artifact_as_log(self, artifact):
        jobstep = artifact.step
        job = artifact.job

        logsource, created = get_or_create(LogSource, where={
            'name': artifact.data['displayPath'],
            'job': job,
            'step': jobstep,
        }, defaults={
            'job': job,
            'project': job.project,
            'date_created': job.date_started,
        })

        url = '{base}/job/{job}/{build}/artifact/{artifact}'.format(
            base=jobstep.data['master'],
            job=jobstep.data['job_name'],
            build=jobstep.data['build_no'],
            artifact=artifact.data['relativePath'],
        )

        offset = 0
        session = self.http_session
        with closing(session.get(url, stream=True, timeout=15)) as resp:
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                chunk, _ = create_or_update(LogChunk, where={
                    'source': logsource,
                    'offset': offset,
                }, values={
                    'job': job,
                    'project': job.project,
                    'size': chunk_size,
                    'text': chunk,
                })
                offset += chunk_size
Exemple #9
0
    def _sync_log(self, jobstep):
        bucket_name = self._get_artifactstore_bucket(jobstep)

        # Note: artifactstore may alter the log name to deduplicate it, so always use data.get('log_artifact_name')
        artifact_name = jobstep.data.get('log_artifact_name')
        if not artifact_name:
            artifact_name = self.artifact_store_client\
                .create_chunked_artifact(bucket_name, artifact_name=JENKINS_LOG_NAME).name
            jobstep.data['log_artifact_name'] = artifact_name
            db.session.add(jobstep)
            db.session.commit()

        logsource, created = get_or_create(LogSource, where={
            'name': artifact_name,
            'step': jobstep,
        }, defaults={
            'job': jobstep.job,
            'project': jobstep.project,
            'date_created': jobstep.date_started,
            'in_artifact_store': True,
        })
        if created:
            offset = 0
        else:
            offset = jobstep.data.get('log_offset', 0)

        url = '{base}/job/{job}/{build}/logText/progressiveText/'.format(
            base=jobstep.data['master'],
            job=jobstep.data['job_name'],
            build=jobstep.data['build_no'],
        )

        start_time = time.time()

        with closing(self._streaming_get(url, params={'start': offset})) as resp:
            log_length = int(resp.headers['X-Text-Size'])

            # When you request an offset that doesnt exist in the build log, Jenkins
            # will instead return the entire log. Jenkins also seems to provide us
            # with X-Text-Size which indicates the total size of the log
            if offset > log_length:
                return

            # Jenkins will suggest to us that there is more data when the job has
            # yet to complete
            has_more = resp.headers.get('X-More-Data') == 'true'

            # XXX: requests doesnt seem to guarantee chunk_size, so we force it
            # with our own helper
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                try:
                    self.artifact_store_client.post_artifact_chunk(bucket_name, artifact_name, offset, chunk)
                    offset += chunk_size

                    if time.time() > start_time + LOG_SYNC_TIMEOUT_SECS:
                        raise RuntimeError('TOO LONG TO DOWNLOAD LOG: %s' % logsource.get_url())
                except Exception as e:
                    # On an exception or a timeout, attempt to truncate the log
                    # Catch all exceptions, including timeouts and HTTP errors

                    self.logger.warning('Exception when uploading logchunks: %s', e.message)

                    has_more = False

                    warning = ("\nLOG TRUNCATED. SEE FULL LOG AT "
                               "{base}/job/{job}/{build}/consoleText\n").format(
                        base=jobstep.data['master'],
                        job=jobstep.data['job_name'],
                        build=jobstep.data['build_no'])
                    self.artifact_store_client.post_artifact_chunk(bucket_name, artifact_name, offset, warning)
                    break

        # We **must** track the log offset externally as Jenkins embeds encoded
        # links and we cant accurately predict the next `start` param.
        jobstep.data['log_offset'] = log_length
        db.session.add(jobstep)

        if not has_more:
            self.artifact_store_client.close_chunked_artifact(bucket_name, artifact_name)

        return True if has_more else None
Exemple #10
0
    def _sync_log(self, jobstep, name, job_name, build_no):
        job = jobstep.job
        logsource, created = get_or_create(LogSource, where={
            'name': name,
            'step': jobstep,
        }, defaults={
            'job': job,
            'project': jobstep.project,
            'date_created': jobstep.date_started,
        })
        if created:
            offset = 0
        else:
            offset = jobstep.data.get('log_offset', 0)

        url = '{base}/job/{job}/{build}/logText/progressiveText/'.format(
            base=jobstep.data['master'],
            job=job_name,
            build=build_no,
        )

        start_time = time.time()

        with closing(self._streaming_get(url, params={'start': offset})) as resp:
            log_length = int(resp.headers['X-Text-Size'])

            # When you request an offset that doesnt exist in the build log, Jenkins
            # will instead return the entire log. Jenkins also seems to provide us
            # with X-Text-Size which indicates the total size of the log
            if offset > log_length:
                return

            # XXX: requests doesnt seem to guarantee chunk_size, so we force it
            # with our own helper
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                chunk, _ = create_or_update(LogChunk, where={
                    'source': logsource,
                    'offset': offset,
                }, values={
                    'job': job,
                    'project': job.project,
                    'size': chunk_size,
                    'text': chunk,
                })
                offset += chunk_size

                if time.time() > start_time + LOG_SYNC_TIMEOUT_SECS:
                    warning = ("\nTRUNCATED LOG: TOOK TOO LONG TO DOWNLOAD FROM JENKINS. SEE FULL LOG AT "
                               "{base}/job/{job}/{build}/consoleText\n").format(
                                   base=jobstep.data['master'],
                                   job=job_name,
                                   build=build_no)
                    create_or_update(LogChunk, where={
                        'source': logsource,
                        'offset': offset,
                    }, values={
                        'job': job,
                        'project': job.project,
                        'size': len(warning),
                        'text': warning,
                    })
                    offset += chunk_size
                    self.logger.warning('log download took too long: %s', logsource.get_url())
                    break

            # Jenkins will suggest to us that there is more data when the job has
            # yet to complete
            has_more = resp.headers.get('X-More-Data') == 'true'

        # We **must** track the log offset externally as Jenkins embeds encoded
        # links and we cant accurately predict the next `start` param.
        jobstep.data['log_offset'] = log_length
        db.session.add(jobstep)

        return True if has_more else None
Exemple #11
0
    def _sync_log(self, jobstep, name, job_name, build_no):
        job = jobstep.job
        logsource, created = get_or_create(LogSource,
                                           where={
                                               'name': name,
                                               'step': jobstep,
                                           },
                                           defaults={
                                               'job':
                                               job,
                                               'project':
                                               jobstep.project,
                                               'date_created':
                                               jobstep.date_started,
                                           })
        if created:
            offset = 0
        else:
            offset = jobstep.data.get('log_offset', 0)

        url = '{base}/job/{job}/{build}/logText/progressiveText/'.format(
            base=jobstep.data['master'],
            job=job_name,
            build=build_no,
        )

        start_time = time.time()

        with closing(self._streaming_get(url, params={'start':
                                                      offset})) as resp:
            log_length = int(resp.headers['X-Text-Size'])

            # When you request an offset that doesnt exist in the build log, Jenkins
            # will instead return the entire log. Jenkins also seems to provide us
            # with X-Text-Size which indicates the total size of the log
            if offset > log_length:
                return

            # XXX: requests doesnt seem to guarantee chunk_size, so we force it
            # with our own helper
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                chunk, _ = create_or_update(LogChunk,
                                            where={
                                                'source': logsource,
                                                'offset': offset,
                                            },
                                            values={
                                                'job': job,
                                                'project': job.project,
                                                'size': chunk_size,
                                                'text': chunk,
                                            })
                offset += chunk_size

                if time.time() > start_time + LOG_SYNC_TIMEOUT_SECS:
                    warning = (
                        "\nTRUNCATED LOG: TOOK TOO LONG TO DOWNLOAD FROM JENKINS. SEE FULL LOG AT "
                        "{base}/job/{job}/{build}/consoleText\n").format(
                            base=jobstep.data['master'],
                            job=job_name,
                            build=build_no)
                    create_or_update(LogChunk,
                                     where={
                                         'source': logsource,
                                         'offset': offset,
                                     },
                                     values={
                                         'job': job,
                                         'project': job.project,
                                         'size': len(warning),
                                         'text': warning,
                                     })
                    offset += chunk_size
                    self.logger.warning('log download took too long: %s',
                                        logsource.get_url())
                    break

            # Jenkins will suggest to us that there is more data when the job has
            # yet to complete
            has_more = resp.headers.get('X-More-Data') == 'true'

        # We **must** track the log offset externally as Jenkins embeds encoded
        # links and we cant accurately predict the next `start` param.
        jobstep.data['log_offset'] = log_length
        db.session.add(jobstep)

        return True if has_more else None
Exemple #12
0
    def _sync_log(self, jobstep):
        bucket_name = self._get_artifactstore_bucket(jobstep)

        # Note: artifactstore may alter the log name to deduplicate it, so always use data.get('log_artifact_name')
        artifact_name = jobstep.data.get('log_artifact_name')
        if not artifact_name:
            artifact_name = self.artifact_store_client\
                .create_chunked_artifact(bucket_name, artifact_name=JENKINS_LOG_NAME).name
            jobstep.data['log_artifact_name'] = artifact_name
            db.session.add(jobstep)
            db.session.commit()

        logsource, created = get_or_create(LogSource,
                                           where={
                                               'name': artifact_name,
                                               'step': jobstep,
                                           },
                                           defaults={
                                               'job': jobstep.job,
                                               'project': jobstep.project,
                                               'date_created':
                                               jobstep.date_started,
                                               'in_artifact_store': True,
                                           })
        if created:
            offset = 0
        else:
            offset = jobstep.data.get('log_offset', 0)

        url = '{base}/job/{job}/{build}/logText/progressiveText/'.format(
            base=jobstep.data['master'],
            job=jobstep.data['job_name'],
            build=jobstep.data['build_no'],
        )

        start_time = time.time()

        with closing(self._streaming_get(url, params={'start':
                                                      offset})) as resp:
            log_length = int(resp.headers['X-Text-Size'])

            # When you request an offset that doesnt exist in the build log, Jenkins
            # will instead return the entire log. Jenkins also seems to provide us
            # with X-Text-Size which indicates the total size of the log
            if offset > log_length:
                return

            # Jenkins will suggest to us that there is more data when the job has
            # yet to complete
            has_more = resp.headers.get('X-More-Data') == 'true'

            # XXX: requests doesnt seem to guarantee chunk_size, so we force it
            # with our own helper
            iterator = resp.iter_content()
            for chunk in chunked(iterator, LOG_CHUNK_SIZE):
                chunk_size = len(chunk)
                try:
                    self.artifact_store_client.post_artifact_chunk(
                        bucket_name, artifact_name, offset, chunk)
                    offset += chunk_size

                    if time.time() > start_time + LOG_SYNC_TIMEOUT_SECS:
                        raise RuntimeError('TOO LONG TO DOWNLOAD LOG: %s' %
                                           logsource.get_url())
                except Exception as e:
                    # On an exception or a timeout, attempt to truncate the log
                    # Catch all exceptions, including timeouts and HTTP errors

                    self.logger.warning(
                        'Exception when uploading logchunks: %s', e.message)

                    has_more = False

                    warning = (
                        "\nLOG TRUNCATED. SEE FULL LOG AT "
                        "{base}/job/{job}/{build}/consoleText\n").format(
                            base=jobstep.data['master'],
                            job=jobstep.data['job_name'],
                            build=jobstep.data['build_no'])
                    self.artifact_store_client.post_artifact_chunk(
                        bucket_name, artifact_name, offset, warning)
                    break

        # We **must** track the log offset externally as Jenkins embeds encoded
        # links and we cant accurately predict the next `start` param.
        jobstep.data['log_offset'] = log_length
        db.session.add(jobstep)

        if not has_more:
            self.artifact_store_client.close_chunked_artifact(
                bucket_name, artifact_name)

        return True if has_more else None
    def post(self, step_id):
        """
        Create a new LogSource or append to an existing source (by name)
        a given set of chunks.

        Very basic soft checking is done to see if a chunk is already present
        in the database. Of note, it's not guaranteed to be correct as another
        commit could be in progress.
        """
        step = JobStep.query.get(step_id)
        if step is None:
            return '', 404

        args = self.parser.parse_args()

        logsource, _ = get_or_create(LogSource,
                                     where={
                                         'step_id': step.id,
                                         'name': args.source,
                                     },
                                     defaults={
                                         'project_id': step.project_id,
                                         'job_id': step.job_id,
                                     })

        offset = args.offset
        if offset is not None:
            # ensure we haven't already recorded an offset that could be
            # in this range
            existing_chunk = LogChunk.query.filter(
                LogChunk.source_id == logsource.id,
                offset >= LogChunk.offset,
                offset <= LogChunk.offset + LogChunk.size - 1,
            ).first()
            if existing_chunk is not None:
                # XXX(dcramer); this is more of an error but we make an assumption
                # that this happens because it was already sent
                existing_msg = {
                    "error":
                    "A chunk within the bounds of the given offset is already recorded."
                }
                return self.respond(existing_msg, status_code=204)
        else:
            offset = db.session.query(
                LogChunk.offset + LogChunk.size, ).filter(
                    LogChunk.source_id == logsource.id, ).order_by(
                        LogChunk.offset.desc(), ).limit(1).scalar() or 0

        logchunks = []
        for chunk in chunked(args.text, LOG_CHUNK_SIZE):
            chunk_size = len(chunk)
            chunk, _ = create_or_update(LogChunk,
                                        where={
                                            'source': logsource,
                                            'offset': offset,
                                        },
                                        values={
                                            'job': step.job,
                                            'project': step.project,
                                            'size': chunk_size,
                                            'text': chunk,
                                        })
            offset += chunk_size
            logchunks.append(chunk)

        context = self.serialize({
            'source':
            logsource,
            'chunks': [{
                'id': c.id,
                'offset': c.offset,
                'size': c.size,
            } for c in logchunks]
        })

        return self.respond(context, serialize=False)
Exemple #14
0
    def post(self, step_id):
        """
        Create a new LogSource or append to an existing source (by name)
        a given set of chunks.

        Very basic soft checking is done to see if a chunk is already present
        in the database. Of note, it's not guaranteed to be correct as another
        commit could be in progress.
        """
        step = JobStep.query.get(step_id)
        if step is None:
            return '', 404

        args = self.parser.parse_args()

        logsource, _ = get_or_create(LogSource, where={
            'step_id': step.id,
            'name': args.source,
        }, defaults={
            'project_id': step.project_id,
            'job_id': step.job_id,
        })

        offset = args.offset
        if offset is not None:
            # ensure we haven't already recorded an offset that could be
            # in this range
            existing_chunk = LogChunk.query.filter(
                LogChunk.source_id == logsource.id,
                offset >= LogChunk.offset,
                offset <= LogChunk.offset + LogChunk.size - 1,
            ).first()
            if existing_chunk is not None:
                # XXX(dcramer); this is more of an error but we make an assumption
                # that this happens because it was already sent
                existing_msg = {"error": "A chunk within the bounds of the given offset is already recorded."}
                return self.respond(existing_msg, status_code=204)
        else:
            offset = db.session.query(
                LogChunk.offset + LogChunk.size,
            ).filter(
                LogChunk.source_id == logsource.id,
            ).order_by(
                LogChunk.offset.desc(),
            ).limit(1).scalar() or 0

        logchunks = []
        for chunk in chunked(args.text, LOG_CHUNK_SIZE):
            chunk_size = len(chunk)
            chunk, _ = create_or_update(LogChunk, where={
                'source': logsource,
                'offset': offset,
            }, values={
                'job': step.job,
                'project': step.project,
                'size': chunk_size,
                'text': chunk,
            })
            offset += chunk_size
            logchunks.append(chunk)

        context = self.serialize({
            'source': logsource,
            'chunks': [{
                'id': c.id,
                'offset': c.offset,
                'size': c.size,
            } for c in logchunks]
        })

        return self.respond(context, serialize=False)