Beispiel #1
0
Datei: sm.py Projekt: mgius/pyz3r
    async def _init(self):
        self.site = http.site(
            site_baseurl=self.baseurl,
            username=self.username,
            password=self.password,
        )

        if self.settings:
            endpoint = f'/api/randomizers/{self.randomizer}/generate'
            self.data = await self.site.generate_game(endpoint, self.settings)
            self.guid = uuid.UUID(hex=self.data['guid'])
            self.slug_id = slugid.encode(self.guid)
        elif self.slug_id:
            self.guid = slugid.decode(self.slug_id)
            self.data = await http.request_generic(
                url=f'{self.baseurl}/api/seed/{self.guid.hex}',
                method='get',
                returntype='json')
        elif self.guid_id:
            self.guid = uuid.UUID(hex=self.guid_id)
            self.slug_id = slugid.encode(self.guid)
            self.data = await http.request_generic(
                url=f'{self.baseurl}/api/seed/{self.guid.hex}',
                method='get',
                returntype='json')
        else:
            self.data = None
            self.slug_id = None
            self.guid = None
Beispiel #2
0
 async def _init(self):
     if self.settings:
         self.endpoint = f'/api/randomizers/{self.randomizer}/generate'
         self.data = await self.generate_game()
         self.guid = uuid.UUID(hex=self.data['guid'])
         self.slug_id = slugid.encode(self.guid)
     elif self.slug_id:
         self.guid = slugid.decode(self.slug_id)
         self.data = await self.retrieve_game()
     elif self.guid_id:
         self.guid = uuid.UUID(hex=self.guid_id)
         self.slug_id = slugid.encode(self.guid)
         self.data = await self.retrieve_game()
     else:
         self.data = None
         self.slug_id = None
         self.guid = None
Beispiel #3
0
def testSlugDecodeEncode():
    """ Test that 10000 v4 slugs are unchanged after decoding and then encoding them."""

    for i in range(0, 10000):
        slug1 = slugid.v4()
        uuid_ = slugid.decode(slug1)
        slug2 = slugid.encode(uuid_)

        assert slug1 == slug2, "Decode and encode isn't identity"
Beispiel #4
0
def testSlugDecodeEncode():
    """ Test that 10000 v4 slugs are unchanged after decoding and then encoding them."""

    for i in range(0, 10000):
        slug1 = slugid.v4()
        uuid_ = slugid.decode(slug1)
        slug2 = slugid.encode(uuid_)

        assert slug1 == slug2, "Decode and encode isn't identity"
Beispiel #5
0
def testUuidEncodeDecode():
    """ Test that 10000 v4 uuids are unchanged after encoding and then decoding them"""

    for i in range(0, 10000):
        uuid1 = uuid.uuid4()
        slug = slugid.encode(uuid1)
        uuid2 = slugid.decode(slug)

        assert uuid1 == uuid2, "Encode and decode isn't identity: '" + str(uuid1) + "' != '" + str(uuid2) + "'"
Beispiel #6
0
def testUuidEncodeDecode():
    """ Test that 10000 v4 uuids are unchanged after encoding and then decoding them"""

    for i in range(0, 10000):
        uuid1 = uuid.uuid4()
        slug = slugid.encode(uuid1)
        uuid2 = slugid.decode(slug)

        assert uuid1 == uuid2, "Encode and decode isn't identity: '" + str(
            uuid1) + "' != '" + str(uuid2) + "'"
Beispiel #7
0
def test_new_job_transformation(new_pulse_jobs, new_transformed_jobs, failure_classifications):
    jl = JobLoader()
    for message in new_pulse_jobs:
        # "task_id" which is not really the task_id
        job_guid = message["taskId"]
        (decoded_task_id, _) = job_guid.split("/")
        # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3.
        taskId = slugid.encode(uuid.UUID(decoded_task_id))
        transformed_job = jl.process_job(message, 'https://firefox-ci-tc.services.mozilla.com')
        # Not all messages from Taskcluster will be processed
        if transformed_job:
            assert new_transformed_jobs[taskId] == transformed_job
Beispiel #8
0
def testEncode():
    """ Test that we can correctly encode a "non-nice" uuid (with first bit
    set) to its known slug. The specific uuid was chosen since it has a slug
    which contains both `-` and `_` characters."""

    # 10000000010011110011111111001000110111111100101101001011000001101000100111111011101011101111101011010101111000011000011101010100....
    # <8 ><0 ><4 ><f ><3 ><f ><c ><8 ><d ><f ><c ><b ><4 ><b ><0 ><6 ><8 ><9 ><f ><b ><a ><e ><f ><a ><d ><5 ><e ><1 ><8 ><7 ><5 ><4 >
    # < g  >< E  >< 8  >< _  >< y  >< N  >< _  >< L  >< S  >< w  >< a  >< J  >< -  >< 6  >< 7  >< 6  >< 1  >< e  >< G  >< H  >< V  >< A  >
    uuid_ = uuid.UUID('{804f3fc8-dfcb-4b06-89fb-aefad5e18754}')
    expectedSlug = 'gE8_yN_LSwaJ-6761eGHVA'
    actualSlug = slugid.encode(uuid_)

    assert expectedSlug == actualSlug, "UUID not correctly encoded into slug: '" + expectedSlug + "' != '" + actualSlug + "'"
Beispiel #9
0
def uuid_to_slug(uuid_in):
    """ Returns a utf-8 slug representation of a UUID.

    :param uuid.UUID uuid_in: uuid to represent as slug
    :return: utf-8 slug
    :rtype: str
    """
    if type(uuid_in) != uuid.UUID:
        try:
            uuid_in = uuid.UUID(uuid_in)
        except (AttributeError, ValueError):
            raise exceptions.ValidationError('invalid uuid value')
    return slugid.encode(uuid_in).decode('utf-8')
Beispiel #10
0
def testEncode():
    """ Test that we can correctly encode a "non-nice" uuid (with first bit
    set) to its known slug. The specific uuid was chosen since it has a slug
    which contains both `-` and `_` characters."""

    # 10000000010011110011111111001000110111111100101101001011000001101000100111111011101011101111101011010101111000011000011101010100....
    # <8 ><0 ><4 ><f ><3 ><f ><c ><8 ><d ><f ><c ><b ><4 ><b ><0 ><6 ><8 ><9 ><f ><b ><a ><e ><f ><a ><d ><5 ><e ><1 ><8 ><7 ><5 ><4 >
    # < g  >< E  >< 8  >< _  >< y  >< N  >< _  >< L  >< S  >< w  >< a  >< J  >< -  >< 6  >< 7  >< 6  >< 1  >< e  >< G  >< H  >< V  >< A  >
    uuid_ = uuid.UUID('{804f3fc8-dfcb-4b06-89fb-aefad5e18754}')
    expectedSlug = 'gE8_yN_LSwaJ-6761eGHVA'
    actualSlug = slugid.encode(uuid_)

    assert expectedSlug == actualSlug, "UUID not correctly encoded into slug: '" + expectedSlug + "' != '" + actualSlug + "'"
Beispiel #11
0
def normalize_task_id(task_id):
    # For some reason, pulse doesn't get the real
    # task ID, but some alternate encoding of it that doesn't
    # work anywhere else. So we have to first convert to the canonical
    # form.
    task_id = task_id.split("/", 1)[0]
    try:
        task_uuid = uuid.UUID(task_id)
    except ValueError:
        # This is probably alrady in the canonoical form
        return task_id

    return slugid.encode(task_uuid)
Beispiel #12
0
def print_url_to_taskcluster(job_guid):
    job_guid = job["job_guid"]
    (decoded_task_id, _) = job_guid.split("/")
    # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3.
    taskId = slugid.encode(uuid.UUID(decoded_task_id))
    logger.info("https://firefox-ci-tc.services.mozilla.com/tasks/%s", taskId)
Beispiel #13
0
    def transform(self, pulse_job):
        """
        Transform a pulse job into a job that can be written to disk.  Log
        References and artifacts will also be transformed and loaded with the
        job.

        We can rely on the structure of ``pulse_job`` because it will
        already have been validated against the JSON Schema at this point.
        """
        job_guid = pulse_job["taskId"]

        x = {
            "job": {
                "job_guid": job_guid,
                "name": pulse_job["display"].get("jobName", "unknown"),
                "job_symbol": self._get_job_symbol(pulse_job),
                "group_name": pulse_job["display"].get("groupName", "unknown"),
                "group_symbol": pulse_job["display"].get("groupSymbol"),
                "product_name": pulse_job.get("productName", "unknown"),
                "state": pulse_job["state"],
                "result": self._get_result(pulse_job),
                "reason": pulse_job.get("reason", "unknown"),
                "who": pulse_job.get("owner", "unknown"),
                "build_system_type": pulse_job["buildSystem"],
                "tier": pulse_job.get("tier", 1),
                "machine": self._get_machine(pulse_job),
                "option_collection": self._get_option_collection(pulse_job),
                "log_references": self._get_log_references(pulse_job),
                "artifacts": self._get_artifacts(pulse_job, job_guid),
            },
            "coalesced": pulse_job.get("coalesced", []),
            "revision": pulse_job["origin"]["revision"]
        }

        # some or all the time fields may not be present in some cases
        for k, v in self.TIME_FIELD_MAP.items():
            if v in pulse_job:
                x["job"][k] = to_timestamp(pulse_job[v])

        # if only one platform is given, use it.
        default_platform = pulse_job.get(
            "buildMachine",
            pulse_job.get("runMachine", {}))

        for k, v in self.PLATFORM_FIELD_MAP.items():
            platform_src = pulse_job[v] if v in pulse_job else default_platform
            x["job"][k] = self._get_platform(platform_src)

        # add some taskcluster metadata if it's available
        # currently taskcluster doesn't pass the taskId directly, so we'll
        # to derive it from the guid, where it is stored in uncompressed
        # guid form of a slug (see: https://github.com/taskcluster/slugid)
        # FIXME: add support for processing the taskcluster information
        # properly, when it's available:
        # https://bugzilla.mozilla.org/show_bug.cgi?id=1323110#c7
        try:
            (decoded_task_id, retry_id) = job_guid.split('/')
            real_task_id = slugid.encode(uuid.UUID(decoded_task_id))
            x["job"].update({
                "taskcluster_task_id": real_task_id,
                "taskcluster_retry_id": int(retry_id)
            })
        except:
            pass

        return x
Beispiel #14
0
for filepath in args.files:
    print("* Dealing:", filepath, file=sys.stderr)
    with warc.open(filepath, 'rb') as f:
        for record in tqdm(f, unit='records'):
            URI = record.header.get('warc-target-uri')
            content = record.payload.read()
            if URI is not None and content is not None:
                words = latin_sep_words.split(str(content))

                words = [(k, v) for (k, v) in Counter(words).items()]

                if args.uuid:
                    uuid = record.header.get('WARC-Record-ID')[1:-1]
                    uuid = UUID(uuid)
                    if args.compressuuid:
                        uuid = slugid.encode(uuid)
                    [
                        print("{word}\t{uuid} {count}".format(
                            word=word, uuid=uuid.decode('ascii'), count=count))
                        for (word, count) in words
                    ]
                else:
                    docID = docIdGenerator.next()
                    if args.urlTable:
                        print("{docID}\t{url}".format(docID=docID, url=URI),
                              file=fileURLTable)
                        # fileURLTable.write("{docID}\t{url}\t{lan}".format(docID=docID, url=URI, lan=lang))
                    if args.binary:
                        docID = docID.to_bytes(docIDDigits,
                                               'little',
                                               signed=True)
Beispiel #15
0
def print_url_to_taskcluster(job_guid):
    job_guid = job["job_guid"]
    (decoded_task_id, _) = job_guid.split("/")
    # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3.
    taskId = slugid.encode(uuid.UUID(decoded_task_id))
    logger.info("https://taskcluster-ui.herokuapp.com/tasks/%s", taskId)
Beispiel #16
0
    def transform(self, pulse_job):
        """
        Transform a pulse job into a job that can be written to disk.  Log
        References and artifacts will also be transformed and loaded with the
        job.

        We can rely on the structure of ``pulse_job`` because it will
        already have been validated against the JSON Schema at this point.
        """
        job_guid = pulse_job["taskId"]

        x = {
            "job": {
                "job_guid": job_guid,
                "name": pulse_job["display"].get("jobName", "unknown"),
                "job_symbol": self._get_job_symbol(pulse_job),
                "group_name": pulse_job["display"].get("groupName", "unknown"),
                "group_symbol": pulse_job["display"].get("groupSymbol"),
                "product_name": pulse_job.get("productName", "unknown"),
                "state": pulse_job["state"],
                "result": self._get_result(pulse_job),
                "reason": pulse_job.get("reason", "unknown"),
                "who": pulse_job.get("owner", "unknown"),
                "build_system_type": pulse_job["buildSystem"],
                "tier": pulse_job.get("tier", 1),
                "machine": self._get_machine(pulse_job),
                "option_collection": self._get_option_collection(pulse_job),
                "log_references": self._get_log_references(pulse_job),
                "artifacts": self._get_artifacts(pulse_job, job_guid),
            },
            "superseded": pulse_job.get("coalesced", []),
            "revision": pulse_job["origin"]["revision"]
        }

        # some or all the time fields may not be present in some cases
        for k, v in self.TIME_FIELD_MAP.items():
            if v in pulse_job:
                x["job"][k] = to_timestamp(pulse_job[v])

        # if only one platform is given, use it.
        default_platform = pulse_job.get("buildMachine",
                                         pulse_job.get("runMachine", {}))

        for k, v in self.PLATFORM_FIELD_MAP.items():
            platform_src = pulse_job[v] if v in pulse_job else default_platform
            x["job"][k] = self._get_platform(platform_src)

        # add some taskcluster metadata if it's available
        # currently taskcluster doesn't pass the taskId directly, so we'll
        # to derive it from the guid, where it is stored in uncompressed
        # guid form of a slug (see: https://github.com/taskcluster/slugid)
        # FIXME: add support for processing the taskcluster information
        # properly, when it's available:
        # https://bugzilla.mozilla.org/show_bug.cgi?id=1323110#c7
        try:
            (decoded_task_id, retry_id) = job_guid.split('/')
            real_task_id = slugid.encode(uuid.UUID(decoded_task_id))
            x["job"].update({
                "taskcluster_task_id": real_task_id,
                "taskcluster_retry_id": int(retry_id)
            })
        # TODO: Figure out what exception types we actually expect here.
        except Exception:
            pass

        return x
Beispiel #17
0
def task_and_retry_ids(job_guid):
    (decoded_task_id, retry_id) = job_guid.split('/')
    # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3.
    real_task_id = slugid.encode(uuid.UUID(decoded_task_id))
    return (real_task_id, retry_id)