async def _init(self): self.site = http.site( site_baseurl=self.baseurl, username=self.username, password=self.password, ) if self.settings: endpoint = f'/api/randomizers/{self.randomizer}/generate' self.data = await self.site.generate_game(endpoint, self.settings) self.guid = uuid.UUID(hex=self.data['guid']) self.slug_id = slugid.encode(self.guid) elif self.slug_id: self.guid = slugid.decode(self.slug_id) self.data = await http.request_generic( url=f'{self.baseurl}/api/seed/{self.guid.hex}', method='get', returntype='json') elif self.guid_id: self.guid = uuid.UUID(hex=self.guid_id) self.slug_id = slugid.encode(self.guid) self.data = await http.request_generic( url=f'{self.baseurl}/api/seed/{self.guid.hex}', method='get', returntype='json') else: self.data = None self.slug_id = None self.guid = None
async def _init(self): if self.settings: self.endpoint = f'/api/randomizers/{self.randomizer}/generate' self.data = await self.generate_game() self.guid = uuid.UUID(hex=self.data['guid']) self.slug_id = slugid.encode(self.guid) elif self.slug_id: self.guid = slugid.decode(self.slug_id) self.data = await self.retrieve_game() elif self.guid_id: self.guid = uuid.UUID(hex=self.guid_id) self.slug_id = slugid.encode(self.guid) self.data = await self.retrieve_game() else: self.data = None self.slug_id = None self.guid = None
def testSlugDecodeEncode(): """ Test that 10000 v4 slugs are unchanged after decoding and then encoding them.""" for i in range(0, 10000): slug1 = slugid.v4() uuid_ = slugid.decode(slug1) slug2 = slugid.encode(uuid_) assert slug1 == slug2, "Decode and encode isn't identity"
def testUuidEncodeDecode(): """ Test that 10000 v4 uuids are unchanged after encoding and then decoding them""" for i in range(0, 10000): uuid1 = uuid.uuid4() slug = slugid.encode(uuid1) uuid2 = slugid.decode(slug) assert uuid1 == uuid2, "Encode and decode isn't identity: '" + str(uuid1) + "' != '" + str(uuid2) + "'"
def testUuidEncodeDecode(): """ Test that 10000 v4 uuids are unchanged after encoding and then decoding them""" for i in range(0, 10000): uuid1 = uuid.uuid4() slug = slugid.encode(uuid1) uuid2 = slugid.decode(slug) assert uuid1 == uuid2, "Encode and decode isn't identity: '" + str( uuid1) + "' != '" + str(uuid2) + "'"
def test_new_job_transformation(new_pulse_jobs, new_transformed_jobs, failure_classifications): jl = JobLoader() for message in new_pulse_jobs: # "task_id" which is not really the task_id job_guid = message["taskId"] (decoded_task_id, _) = job_guid.split("/") # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3. taskId = slugid.encode(uuid.UUID(decoded_task_id)) transformed_job = jl.process_job(message, 'https://firefox-ci-tc.services.mozilla.com') # Not all messages from Taskcluster will be processed if transformed_job: assert new_transformed_jobs[taskId] == transformed_job
def testEncode(): """ Test that we can correctly encode a "non-nice" uuid (with first bit set) to its known slug. The specific uuid was chosen since it has a slug which contains both `-` and `_` characters.""" # 10000000010011110011111111001000110111111100101101001011000001101000100111111011101011101111101011010101111000011000011101010100.... # <8 ><0 ><4 ><f ><3 ><f ><c ><8 ><d ><f ><c ><b ><4 ><b ><0 ><6 ><8 ><9 ><f ><b ><a ><e ><f ><a ><d ><5 ><e ><1 ><8 ><7 ><5 ><4 > # < g >< E >< 8 >< _ >< y >< N >< _ >< L >< S >< w >< a >< J >< - >< 6 >< 7 >< 6 >< 1 >< e >< G >< H >< V >< A > uuid_ = uuid.UUID('{804f3fc8-dfcb-4b06-89fb-aefad5e18754}') expectedSlug = 'gE8_yN_LSwaJ-6761eGHVA' actualSlug = slugid.encode(uuid_) assert expectedSlug == actualSlug, "UUID not correctly encoded into slug: '" + expectedSlug + "' != '" + actualSlug + "'"
def uuid_to_slug(uuid_in): """ Returns a utf-8 slug representation of a UUID. :param uuid.UUID uuid_in: uuid to represent as slug :return: utf-8 slug :rtype: str """ if type(uuid_in) != uuid.UUID: try: uuid_in = uuid.UUID(uuid_in) except (AttributeError, ValueError): raise exceptions.ValidationError('invalid uuid value') return slugid.encode(uuid_in).decode('utf-8')
def normalize_task_id(task_id): # For some reason, pulse doesn't get the real # task ID, but some alternate encoding of it that doesn't # work anywhere else. So we have to first convert to the canonical # form. task_id = task_id.split("/", 1)[0] try: task_uuid = uuid.UUID(task_id) except ValueError: # This is probably alrady in the canonoical form return task_id return slugid.encode(task_uuid)
def print_url_to_taskcluster(job_guid): job_guid = job["job_guid"] (decoded_task_id, _) = job_guid.split("/") # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3. taskId = slugid.encode(uuid.UUID(decoded_task_id)) logger.info("https://firefox-ci-tc.services.mozilla.com/tasks/%s", taskId)
def transform(self, pulse_job): """ Transform a pulse job into a job that can be written to disk. Log References and artifacts will also be transformed and loaded with the job. We can rely on the structure of ``pulse_job`` because it will already have been validated against the JSON Schema at this point. """ job_guid = pulse_job["taskId"] x = { "job": { "job_guid": job_guid, "name": pulse_job["display"].get("jobName", "unknown"), "job_symbol": self._get_job_symbol(pulse_job), "group_name": pulse_job["display"].get("groupName", "unknown"), "group_symbol": pulse_job["display"].get("groupSymbol"), "product_name": pulse_job.get("productName", "unknown"), "state": pulse_job["state"], "result": self._get_result(pulse_job), "reason": pulse_job.get("reason", "unknown"), "who": pulse_job.get("owner", "unknown"), "build_system_type": pulse_job["buildSystem"], "tier": pulse_job.get("tier", 1), "machine": self._get_machine(pulse_job), "option_collection": self._get_option_collection(pulse_job), "log_references": self._get_log_references(pulse_job), "artifacts": self._get_artifacts(pulse_job, job_guid), }, "coalesced": pulse_job.get("coalesced", []), "revision": pulse_job["origin"]["revision"] } # some or all the time fields may not be present in some cases for k, v in self.TIME_FIELD_MAP.items(): if v in pulse_job: x["job"][k] = to_timestamp(pulse_job[v]) # if only one platform is given, use it. default_platform = pulse_job.get( "buildMachine", pulse_job.get("runMachine", {})) for k, v in self.PLATFORM_FIELD_MAP.items(): platform_src = pulse_job[v] if v in pulse_job else default_platform x["job"][k] = self._get_platform(platform_src) # add some taskcluster metadata if it's available # currently taskcluster doesn't pass the taskId directly, so we'll # to derive it from the guid, where it is stored in uncompressed # guid form of a slug (see: https://github.com/taskcluster/slugid) # FIXME: add support for processing the taskcluster information # properly, when it's available: # https://bugzilla.mozilla.org/show_bug.cgi?id=1323110#c7 try: (decoded_task_id, retry_id) = job_guid.split('/') real_task_id = slugid.encode(uuid.UUID(decoded_task_id)) x["job"].update({ "taskcluster_task_id": real_task_id, "taskcluster_retry_id": int(retry_id) }) except: pass return x
for filepath in args.files: print("* Dealing:", filepath, file=sys.stderr) with warc.open(filepath, 'rb') as f: for record in tqdm(f, unit='records'): URI = record.header.get('warc-target-uri') content = record.payload.read() if URI is not None and content is not None: words = latin_sep_words.split(str(content)) words = [(k, v) for (k, v) in Counter(words).items()] if args.uuid: uuid = record.header.get('WARC-Record-ID')[1:-1] uuid = UUID(uuid) if args.compressuuid: uuid = slugid.encode(uuid) [ print("{word}\t{uuid} {count}".format( word=word, uuid=uuid.decode('ascii'), count=count)) for (word, count) in words ] else: docID = docIdGenerator.next() if args.urlTable: print("{docID}\t{url}".format(docID=docID, url=URI), file=fileURLTable) # fileURLTable.write("{docID}\t{url}\t{lan}".format(docID=docID, url=URI, lan=lang)) if args.binary: docID = docID.to_bytes(docIDDigits, 'little', signed=True)
def print_url_to_taskcluster(job_guid): job_guid = job["job_guid"] (decoded_task_id, _) = job_guid.split("/") # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3. taskId = slugid.encode(uuid.UUID(decoded_task_id)) logger.info("https://taskcluster-ui.herokuapp.com/tasks/%s", taskId)
def transform(self, pulse_job): """ Transform a pulse job into a job that can be written to disk. Log References and artifacts will also be transformed and loaded with the job. We can rely on the structure of ``pulse_job`` because it will already have been validated against the JSON Schema at this point. """ job_guid = pulse_job["taskId"] x = { "job": { "job_guid": job_guid, "name": pulse_job["display"].get("jobName", "unknown"), "job_symbol": self._get_job_symbol(pulse_job), "group_name": pulse_job["display"].get("groupName", "unknown"), "group_symbol": pulse_job["display"].get("groupSymbol"), "product_name": pulse_job.get("productName", "unknown"), "state": pulse_job["state"], "result": self._get_result(pulse_job), "reason": pulse_job.get("reason", "unknown"), "who": pulse_job.get("owner", "unknown"), "build_system_type": pulse_job["buildSystem"], "tier": pulse_job.get("tier", 1), "machine": self._get_machine(pulse_job), "option_collection": self._get_option_collection(pulse_job), "log_references": self._get_log_references(pulse_job), "artifacts": self._get_artifacts(pulse_job, job_guid), }, "superseded": pulse_job.get("coalesced", []), "revision": pulse_job["origin"]["revision"] } # some or all the time fields may not be present in some cases for k, v in self.TIME_FIELD_MAP.items(): if v in pulse_job: x["job"][k] = to_timestamp(pulse_job[v]) # if only one platform is given, use it. default_platform = pulse_job.get("buildMachine", pulse_job.get("runMachine", {})) for k, v in self.PLATFORM_FIELD_MAP.items(): platform_src = pulse_job[v] if v in pulse_job else default_platform x["job"][k] = self._get_platform(platform_src) # add some taskcluster metadata if it's available # currently taskcluster doesn't pass the taskId directly, so we'll # to derive it from the guid, where it is stored in uncompressed # guid form of a slug (see: https://github.com/taskcluster/slugid) # FIXME: add support for processing the taskcluster information # properly, when it's available: # https://bugzilla.mozilla.org/show_bug.cgi?id=1323110#c7 try: (decoded_task_id, retry_id) = job_guid.split('/') real_task_id = slugid.encode(uuid.UUID(decoded_task_id)) x["job"].update({ "taskcluster_task_id": real_task_id, "taskcluster_retry_id": int(retry_id) }) # TODO: Figure out what exception types we actually expect here. except Exception: pass return x
def task_and_retry_ids(job_guid): (decoded_task_id, retry_id) = job_guid.split('/') # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3. real_task_id = slugid.encode(uuid.UUID(decoded_task_id)) return (real_task_id, retry_id)