def replace_astral(log_list): for item in log_list: for key in ["test", "subtest", "message", "stack", "stackwalk_stdout", "stackwalk_stderr"]: if key in item: item[key] = astral_filter(item[key]) yield item
def replace_astral(log_list): for item in log_list: for key in ["test", "subtest", "message", "stack", "stackwalk_stdout", "stackwalk_stderr"]: if key in item: item[key] = astral_filter(item[key]) yield item
def store_text_log_summary(self, job, text_log_summary_artifact): """ Store the contents of the text log summary artifact """ step_data = json.loads(text_log_summary_artifact['blob'])['step_data'] result_map = {v: k for (k, v) in TextLogStep.RESULTS} for step in step_data['steps']: name = step['name'][:TextLogStep._meta.get_field('name'). max_length] defaults = {'name': name, 'result': result_map[step['result']]} # process start/end times if we have them # we currently don't support timezones in treeherder, so # just ignore that when importing/updating the bug to avoid # a ValueError (though by default the text log summaries # we produce should have time expressed in UTC anyway) for tkey in ('started', 'finished'): if step.get(tkey): defaults[tkey] = dateutil.parser.parse(step[tkey], ignoretz=True) log_step, _ = TextLogStep.objects.update_or_create( job=job, started_line_number=step['started_linenumber'], finished_line_number=step['finished_linenumber'], defaults=defaults) if step.get('errors'): for error in step['errors']: TextLogError.objects.update_or_create( step=log_step, line_number=error['linenumber'], defaults={'line': astral_filter(error['line'])}) # create a set of bug suggestions immediately load_error_summary(job.repository.name, job.id)
def _get_unicode_string(author): try: return astral_filter(smart_text(author)) except DjangoUnicodeDecodeError: # in certain annoying cases it seems like a field that should be # utf-8 is specified in extended ascii (pretty sure this is a mysql # issue, since mercurial/github should store/return in utf-8), so # do a conversion return author.decode('iso-8859-1')
def _get_unicode_string(author): try: return astral_filter(smart_text(author)) except DjangoUnicodeDecodeError: # in certain annoying cases it seems like a field that should be # utf-8 is specified in extended ascii (pretty sure this is a mysql # issue, since mercurial/github should store/return in utf-8), so # do a conversion return author.decode('iso-8859-1')
def store_text_log_summary_artifact(job, text_log_summary_artifact): """ Store the contents of the text log summary artifact """ errors = json.loads(text_log_summary_artifact['blob'])['errors'] with transaction.atomic(): for error in errors: obj, created = TextLogError.objects.get_or_create( job=job, line_number=error['linenumber'], line=astral_filter(error['line']), ) if not created: logger.warning('duplicate error lines processed for job %s', job.id) # get error summary immediately (to warm the cache) error_summary.get_error_summary(job)
def store_text_log_summary_artifact(job, text_log_summary_artifact): """ Store the contents of the text log summary artifact """ step_data = json.loads(text_log_summary_artifact['blob'])['step_data'] result_map = {v: k for (k, v) in TextLogStep.RESULTS} with transaction.atomic(): for step in step_data['steps']: name = step['name'][:TextLogStep._meta.get_field('name'). max_length] # process start/end times if we have them # we currently don't support timezones in treeherder, so # just ignore that when importing/updating the bug to avoid # a ValueError (though by default the text log summaries # we produce should have time expressed in UTC anyway) time_kwargs = {} for tkey in ('started', 'finished'): if step.get(tkey): time_kwargs[tkey] = dateutil.parser.parse(step[tkey], ignoretz=True) log_step = TextLogStep.objects.create( job=job, started_line_number=step['started_linenumber'], finished_line_number=step['finished_linenumber'], name=name, result=result_map[step['result']], **time_kwargs, ) if step.get('errors'): for error in step['errors']: TextLogError.objects.create( job=job, step=log_step, line_number=error['linenumber'], line=astral_filter(error['line']), ) # get error summary immediately (to warm the cache) error_summary.get_error_summary(job)
def store_text_log_summary_artifact(job, text_log_summary_artifact): """ Store the contents of the text log summary artifact """ step_data = json.loads( text_log_summary_artifact['blob'])['step_data'] result_map = {v: k for (k, v) in TextLogStep.RESULTS} with transaction.atomic(): for step in step_data['steps']: name = step['name'][:TextLogStep._meta.get_field('name').max_length] # process start/end times if we have them # we currently don't support timezones in treeherder, so # just ignore that when importing/updating the bug to avoid # a ValueError (though by default the text log summaries # we produce should have time expressed in UTC anyway) time_kwargs = {} for tkey in ('started', 'finished'): if step.get(tkey): time_kwargs[tkey] = dateutil.parser.parse( step[tkey], ignoretz=True) log_step = TextLogStep.objects.create( job=job, started_line_number=step['started_linenumber'], finished_line_number=step['finished_linenumber'], name=name, result=result_map[step['result']], **time_kwargs) if step.get('errors'): for error in step['errors']: TextLogError.objects.create( step=log_step, line_number=error['linenumber'], line=astral_filter(error['line'])) # get error summary immediately (to warm the cache) error_summary.get_error_summary(job)
def test_astra_filter_hex_value(): """check the expected outcome is also not changed""" hex_values = '\U00000048\U00000049' assert hex_values == astral_filter(hex_values)
def test_astra_filter_emoji(): output = astral_filter(u'🍆') expected = '<U+01F346>' assert output == expected
def test_astra_filter_hex_value(): """check the expected outcome is also not changed""" hex_values = '\U00000048\U00000049' assert hex_values == astral_filter(hex_values)
def test_astra_filter_emoji(): output = astral_filter(u'🍆') expected = '<U+01F346>' assert output == expected
def handle(self, *args, **options): if options['project']: projects = options['project'] else: projects = Datasource.objects.values_list('project', flat=True) for ds in Datasource.objects.filter(project__in=projects): print ds.project try: repository = Repository.objects.get(name=ds.project) except Repository.DoesNotExist: self.stderr.write('No repository for datasource project {}, skipping'.format( ds.project)) continue # we do the migration in two passes: first we migrate the text log # steps, then the errors db_options = settings.DATABASES['default'].get('OPTIONS', {}) db = MySQLdb.connect( host=settings.DATABASES['default']['HOST'], db=ds.name, user=settings.DATABASES['default']['USER'], passwd=settings.DATABASES['default'].get('PASSWORD') or '', **db_options ) c = db.cursor() offset = 0 limit = options['batch_size'] result_map = dict((v, k) for (k, v) in TextLogStep.RESULTS) while True: job_id_pairs = Job.objects.filter( id__gt=offset, repository=repository).values_list( 'id', 'project_specific_id')[:limit] if len(job_id_pairs) == 0: break ds_job_ids = set([job_id_pair[1] for job_id_pair in job_id_pairs]) # filter out those job ids for which we already have # generated job details ds_job_ids -= set(TextLogStep.objects.filter( job__repository=repository, job__project_specific_id__in=ds_job_ids).values_list( 'job__project_specific_id', flat=True)) start = time.time() if ds_job_ids: job_id_mapping = dict((project_specific_id, job_id) for (job_id, project_specific_id) in job_id_pairs) c.execute("""SELECT job_id, `blob` from job_artifact where `name` = 'text_log_summary' and job_id in ({})""".format(",".join([str(job_id) for job_id in ds_job_ids]))) text_log_steps = [] text_log_errors = [] def unwrap(row): steps = json.loads(zlib.decompress(row[1]))['step_data']['steps'] for step in steps: step['name'] = step['name'][:TextLogStep._meta.get_field('name').max_length] for tkey in ['started', 'finished']: if step.get(tkey): step[tkey] = dateutil.parser.parse( step[tkey], ignoretz=True) return (row[0], steps) with transaction.atomic(): with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor: for (ds_job_id, steps) in executor.map(unwrap, c.fetchall()): for step in steps: if step.get('errors'): text_log_step = TextLogStep.objects.create( name=step['name'], result=result_map[step['result']], job_id=job_id_mapping[ds_job_id], started_line_number=step['started_linenumber'], finished_line_number=step['finished_linenumber'], started=step.get('started'), finished=step.get('finished')) lines_covered = set() for error in step['errors']: line_number = error['linenumber'] if line_number not in lines_covered: text_log_errors.append(TextLogError( line_number=line_number, step=text_log_step, line=astral_filter(error['line']))) lines_covered.add(line_number) else: text_log_steps.append(TextLogStep( name=step['name'], result=result_map[step['result']], job_id=job_id_mapping[ds_job_id], started_line_number=step['started_linenumber'], finished_line_number=step['finished_linenumber'], started=step.get('started'), finished=step.get('finished'))) TextLogStep.objects.bulk_create(text_log_steps) TextLogError.objects.bulk_create(text_log_errors) self.stdout.write('{} ({})'.format(offset, time.time() - start), ending='') self.stdout.flush() offset = max([job_id_pair[0] for job_id_pair in job_id_pairs]) print '\n'