def es_bulksetop(esq, frum, query): abs_limit = MIN([query.limit, MAX_DOCUMENTS]) guid = Random.base64(32, extra="-_") schema = query.frum.schema query_path = schema.query_path[0] new_select, split_select = get_selects(query) split_wheres = split_expression_by_path(query.where, schema, lang=ES52) es_query = es_query_proto(query_path, split_select, split_wheres, schema) es_query.size = MIN([query.chunk_size, MAX_CHUNK_SIZE]) es_query.sort = jx_sort_to_es_sort(query.sort, schema) if not es_query.sort: es_query.sort = ["_doc"] formatter = formatters[query.format](abs_limit, new_select, query) Thread.run( "Download " + guid, extractor, guid, abs_limit, esq, es_query, formatter, parent_thread=Null, ).release() output = wrap({ "url": URL_PREFIX / (guid + ".json"), "status": URL_PREFIX / (guid + ".status.json"), "meta": { "format": query.format, "es_query": es_query, "limit": abs_limit }, }) return output
def complex_job( transactional_db, generic_reference_data, test_repository, extract_job_settings, now ): fc = FailureClassification.objects.create(id=1, name="not classified") repository_group = RepositoryGroup.objects.create(name="common") repo = Repository.objects.create(name="autoland", repository_group=repository_group) push = Push.objects.create( **{ "author": "*****@*****.**", "repository": repo, "revision": "ae6bb3a1066959a8c43d003a3caab0af769455bf", "time": unix2datetime(1578427105).replace(tzinfo=None), } ) Commit.objects.create( push=push, revision="ae6bb3a1066959a8c43d003a3caab0af769455bf", author="*****@*****.**", comments="no comment", ) Commit.objects.create( push=push, revision="0123456789012345678901234567890123456789", author="*****@*****.**", comments="no comment2", ) debug = Option.objects.create(name="debug") oc = OptionCollection.objects.create(option_collection_hash=Random.base64(5), option=debug) job = Job.objects.create( autoclassify_status=1, guid=Random.base64(20), repository=test_repository, push_id=push.id, signature=generic_reference_data.signature, build_platform=generic_reference_data.build_platform, machine_platform=generic_reference_data.machine_platform, machine=generic_reference_data.machine, option_collection_hash=oc.option_collection_hash, job_type=generic_reference_data.job_type, job_group=generic_reference_data.job_group, product=generic_reference_data.product, failure_classification_id=fc.id, who="*****@*****.**", reason="scheduled", result="success", state="completed", submit_time=unix2datetime(1578427253).replace(tzinfo=None), start_time=unix2datetime(1578430841).replace(tzinfo=None), last_modified=unix2datetime(1578432686.364459).replace(tzinfo=None), end_time=unix2datetime(1578432680).replace(tzinfo=None), tier=1, ) text_log_step = TextLogStep.objects.create( job=job, **{ "finished_line_number": 88739, "name": "Unnamed step", "result": 7, "started_line_number": 0, }, ) TextLogError.objects.create( step=text_log_step, line="line contents here", line_number=619845839 ) TextLogError.objects.create(step=text_log_step, line="ERROR! more line contents", line_number=6) TaskclusterMetadata.objects.create(job=job, retry_id=0, task_id="WWb9ExAvQUa78ku0DIxdSQ") JobLog.objects.create( **{ "job_id": job.id, "name": "builds-4h", "status": 1, "url": "https://example.com/api/queue/v1/task/WWb9ExAvQUa78ku0DIxdSQ/runs/0/artifacts/public/logs/live_backing.log", } ) job_logs1 = JobLog.objects.create( **{ "job_id": job.id, "name": "errorsummary_json", "status": 1, "url": "https://example.com/api/queue/v1/task/WWb9ExAvQUa78ku0DIxdSQ/runs/0/artifacts/public/test_info/wpt_errorsummary.log", } ) bcf = ClassifiedFailure.objects.create(**{"bug_number": 1234567,}) bcf.created = Date("2020-01-17 12:00:00").datetime bcf.save() FailureLine.objects.create( job_log=job_logs1, **{ "action": "test_groups", "best_classification": bcf, "best_is_verified": True, "repository": repo, "job_guid": job.guid, "line": 15, "modified": 0, "stackwalk_stderr": 1578432686, "stackwalk_stdout": 1578432686, }, ) FailureLine.objects.create( job_log=job_logs1, **{ "action": "crash", "best_classification": bcf, "best_is_verified": False, "repository": repo, "job_guid": job.guid, "line": 24031, "modified": 0, "signature": "@ mozilla::dom::CustomElementData::SetCustomElementDefinition(mozilla::dom::CustomElementDefinition*)", "stackwalk_stderr": 1578432686, "stackwalk_stdout": 1578432686, "test": "/custom-elements/upgrading.html", }, ) return job
def es_bulkaggsop(esq, frum, query): query = query.copy() # WE WILL MARK UP THIS QUERY chunk_size = min(coalesce(query.chunk_size, MAX_CHUNK_SIZE), MAX_CHUNK_SIZE) schema = frum.schema query_path = first(schema.query_path) selects = listwrap(query.select) variable = first(query.groupby).value # FIND CARDINALITY cardinality_check = Timer( "Get cardinality for {{column}}", param={"column": variable.var} ) with cardinality_check: columns = schema.leaves(variable.var) if len(columns) != 1: Log.error( "too many columns to bulk groupby:\n{{columns|json}}", columns=columns ) column = first(columns) if query.where is TRUE: cardinality = column.cardinality if cardinality == None: esq.namespace._update_cardinality(column) cardinality = column.cardinality else: cardinality = esq.query( { "select": { "name": "card", "value": variable, "aggregate": "cardinality", }, "from": frum.name, "where": query.where, "format": "cube", } ).card num_partitions = (cardinality + chunk_size - 1) // chunk_size if num_partitions > MAX_PARTITIONS: Log.error("Requesting more than {{num}} partitions", num=num_partitions) acc, decoders, es_query = build_es_query(selects, query_path, schema, query) guid = Random.base64(32, extra="-_") abs_limit = mo_math.MIN((query.limit, first(query.groupby).domain.limit)) formatter = formatters[query.format](abs_limit) Thread.run( "extract to " + guid + ".json", extractor, guid, num_partitions, esq, query, selects, query_path, schema, chunk_size, cardinality, abs_limit, formatter, parent_thread=Null, ) output = wrap( { "url": URL_PREFIX / (guid + ".json"), "status": URL_PREFIX / (guid + ".status.json"), "meta": { "format": query.format, "timing": {"cardinality_check": cardinality_check.duration}, "es_query": es_query, "num_partitions": num_partitions, "cardinality": cardinality, }, } ) return output
def generate_sid(): """ GENERATE A UNIQUE SESSION ID """ return Random.base64(40)