Beispiel #1
0
async def main(subnet_tag: str):
    package = await vm.repo(
        image_hash="7c63ebd87868e27eb99a687d1175e77450d7b3ff73fc86e6bdcef37b",
        min_mem_gib=4.0,
        min_storage_gib=8.0,
    )

    async def worker(ctx: WorkContext, tasks):
        async for task in tasks:
            ctx.run("/golem/entrypoints/runold.sh")
            output_file = "output.txt"
            ctx.download_file("/golem/output/output.txt", output_file)
            try:
                # Set timeout for executing the script on the provider. Two minutes is plenty
                # of time for computing a single frame, for other tasks it may be not enough.
                # If the timeout is exceeded, this worker instance will be shut down and all
                # remaining tasks, including the current one, will be computed by other providers.
                yield ctx.commit(timeout=timedelta(seconds=18000))
                # TODO: Check if job results are valid
                # and reject by: task.reject_task(reason = 'invalid file')
                task.accept_result(result=output_file)
            except BatchTimeoutError:
                print(f"{utils.TEXT_COLOR_RED}"
                      f"Task timed out: {task}, time: {task.running_time}"
                      f"{utils.TEXT_COLOR_DEFAULT}")
                raise

    # Iterator over the frame indices that we want to render
    frames: range = range(0, 1)
    # Worst-case overhead, in minutes, for initialization (negotiation, file transfer etc.)
    # TODO: make this dynamic, e.g. depending on the size of files to transfer
    init_overhead = 3
    # Providers will not accept work if the timeout is outside of the [5 min, 30min] range.
    # We increase the lower bound to 6 min to account for the time needed for our demand to
    # reach the providers.
    min_timeout, max_timeout = 30, 150

    timeout = timedelta(
        minutes=max(min(init_overhead +
                        len(frames) * 2, max_timeout), min_timeout))

    # By passing `event_consumer=log_summary()` we enable summary logging.
    # See the documentation of the `yapapi.log` module on how to set
    # the level of detail and format of the logged information.
    async with Executor(
        package=package,
        max_workers=3,
        budget=10.0,
        timeout=timeout,
        subnet_tag=subnet_tag,
        event_consumer=log_summary(log_event_repr),
    ) as executor:

        async for task in executor.submit(
                worker, [Task(data=frame) for frame in frames]):
            print(
                f"{utils.TEXT_COLOR_CYAN}"
                f"Task computed: {task}, result: {task.result}, time: {task.running_time}"
                f"{utils.TEXT_COLOR_DEFAULT}")
async def main(subnet_tag: str, app: App, batches: List[Batch]):
    package = await vm.repo(
        # using existing image for 'blender' example
        image_hash="9a3b5d67b0b27746283cb5f287c13eab1beaa12d92a9f536b747c7ae",
        min_mem_gib=1.0,
        min_storage_gib=2.0,
    )

    async def worker(ctx: WorkContext, tasks):
        cwd = pathlib.Path.cwd()
        async for task in tasks:
            batch = task.data
            try:
                os.remove(batch.local_out(cwd))
            except:
                pass

            for exe in batch.all_executables():
                ctx.send_file(batch.local_exe(cwd, exe), batch.remote_exe(exe))
            batch.make_local_sh(cwd)
            ctx.send_file(batch.local_sh(cwd), batch.remote_sh())
            ctx.run("/bin/bash", batch.remote_sh())
            ctx.download_file(batch.remote_out(), batch.local_out(cwd))
            try:
                yield ctx.commit(timeout=timedelta(seconds=batch.timeout()))
                task.accept_result(result=batch.local_out(cwd))
            except BatchTimeoutError:
                print(
                    f"{text_colors.RED}"
                    f"Task timed out: {task.data.id}, time: {task.running_time}"
                    f"{text_colors.DEFAULT}")
                raise

    # Worst-case overhead, in minutes, for initialization (negotiation, file transfer etc.)
    timeout = timedelta(minutes=app.args.time_out)

    # By passing `event_consumer=log_summary()` we enable summary logging.
    # See the documentation of the `yapapi.log` module on how to set
    # the level of detail and format of the logged information.
    async with Executor(
        package=package,
        max_workers=len(batches),
        budget=10.0,
        timeout=timeout,
        subnet_tag=subnet_tag,
        event_consumer=log_summary(log_event_repr),
    ) as executor:

        async for task in executor.submit(
                worker, [Task(data=batch) for batch in batches]):
            print(
                f"{text_colors.CYAN}"
                f"Task computed: {task.data.id}, result: {task.result}, time: {task.running_time}"
                f"{text_colors.DEFAULT}")
Beispiel #3
0
    async def run_step(self, step, timeout=timedelta(minutes=10), budget=10, subnet_tag="community.3" ):
        package = await vm.repo(
            image_hash=step["image"],
            min_mem_gib=1,
            min_storage_gib=5.0,
        )
        async def worker(ctx: WorkContext, tasks):
            async for task in tasks:
                step_name = step['name']
                commands = step['commands']
                # prepair envs in string form of: "k1=v1 k2=v2 ... kn=vn "
                envs = step.get('environment')
                print(f"\033[36;1mSending the context zip file: {self.tar_fname}\033[0m")
                ctx.send_file(self.tar_fname , "/golem/resource/context.zip")
                # extracting tar file.
                print(f"\033[36;1mExtracting the zip file: {self.tar_fname}\033[0m")
                ctx.run("/bin/sh", "-c", "unzip /golem/resource/context.zip")
                # run all commands one by one
                for command in commands:
                    print(f"\033[36;1mRunning {command}\033[0m")
                    # set envs.
                    ctx.run("/bin/sh", "-c", f"{command} >> /golem/output/cmd.log 2>&1", env=envs)
                log_fname = get_temp_log_file(step_name)
                ctx.download_file(f"/golem/output/cmd.log", log_fname)
                try:
                    yield ctx.commit(timeout=timedelta(minutes=30))
                    task.accept_result(result=log_fname)
                except BatchTimeoutError:
                    print(f"Task timed out: {task}, time: {task.running_time}")
                    raise
            ctx.log("no more task to run")

        # By passing `event_emitter=log_summary()` we enable summary logging.
        # See the documentation of the `yapapi.log` module on how to set
        # the level of detail and format of the logged information.
        async with Executor(
            package=package,
            max_workers=1,
            budget=budget,
            timeout=timeout,
            subnet_tag=subnet_tag,
            event_consumer=log_summary(log_event_repr),
        ) as executer:
            async for task in executer.submit(worker, [Task(data=step)]):
                print(f"\033[36;1mStep completed: {task}\033[0m")
                # grab the logs
                self.state[step['name']]['log'] = task.result
                # notify about this task!
                self.state[step['name']]['state'] = StepState.SUCCESS
                self.post_progress(step['name'])
Beispiel #4
0
async def main(args):
    package = await vm.repo(
        image_hash="2c17589f1651baff9b82aa431850e296455777be265c2c5446c902e9",
        min_mem_gib=0.5,
        min_storage_gib=2.0,
    )

    async def worker_check_keyspace(ctx: WorkContext, tasks):
        async for task in tasks:
            keyspace_sh_filename = "keyspace.sh"
            ctx.send_file(keyspace_sh_filename, "/golem/work/keyspace.sh")
            ctx.run("/bin/sh", "/golem/work/keyspace.sh")
            output_file = "keyspace.txt"
            ctx.download_file("/golem/work/keyspace.txt", output_file)
            yield ctx.commit()
            task.accept_result()

    async def worker_find_password(ctx: WorkContext, tasks):
        ctx.send_file("in.hash", "/golem/work/in.hash")

        async for task in tasks:
            skip = task.data
            limit = skip + step

            # Commands to be run on the provider
            commands = (
                "rm -f /golem/work/*.potfile ~/.hashcat/hashcat.potfile; "
                f"touch /golem/work/hashcat_{skip}.potfile; "
                f"hashcat -a 3 -m 400 /golem/work/in.hash {args.mask} --skip={skip} --limit={limit} --self-test-disable -o /golem/work/hashcat_{skip}.potfile || true"
            )
            ctx.run(f"/bin/sh", "-c", commands)

            output_file = f"hashcat_{skip}.potfile"
            ctx.download_file(f"/golem/work/hashcat_{skip}.potfile",
                              output_file)
            yield ctx.commit()
            task.accept_result(result=output_file)

    # beginning of the main flow

    write_hash(args.hash)
    write_keyspace_check_script(args.mask)

    # By passing `event_consumer=log_summary()` we enable summary logging.
    # See the documentation of the `yapapi.log` module on how to set
    # the level of detail and format of the logged information.
    async with Executor(
        package=package,
        max_workers=args.number_of_providers,
        budget=10.0,
        # timeout should be keyspace / number of providers dependent
        timeout=timedelta(minutes=25),
        subnet_tag=args.subnet_tag,
        event_consumer=log_summary(log_event_repr),
    ) as executor:

        keyspace_computed = False
        # This is not a typical use of executor.submit as there is only one task, with no data:
        async for _task in executor.submit(worker_check_keyspace,
                                           [Task(data=None)]):
            keyspace_computed = True

        if not keyspace_computed:
            # Assume the errors have been already reported and we may return quietly.
            return

        keyspace = read_keyspace()

        print(
            f"{utils.TEXT_COLOR_CYAN}"
            f"Task computed: keyspace size count. The keyspace size is {keyspace}"
            f"{utils.TEXT_COLOR_DEFAULT}")

        step = int(keyspace / args.number_of_providers) + 1

        ranges = range(0, keyspace, step)

        async for task in executor.submit(
                worker_find_password, [Task(data=range) for range in ranges]):
            print(f"{utils.TEXT_COLOR_CYAN}"
                  f"Task computed: {task}, result: {task.result}"
                  f"{utils.TEXT_COLOR_DEFAULT}")

        password = read_password(ranges)

        if password is None:
            print(
                f"{utils.TEXT_COLOR_RED}No password found{utils.TEXT_COLOR_DEFAULT}"
            )
        else:
            print(f"{utils.TEXT_COLOR_GREEN}"
                  f"Password found: {password}"
                  f"{utils.TEXT_COLOR_DEFAULT}")
async def main():
    package = await vm.repo(
        image_hash="c0317d4db8930afde1862f27973ee2f5b766c4d50a87409406e2e23f",
        min_mem_gib=2,
        min_storage_gib=2.5,
    )

    async def worker_train_model(ctx: WorkContext, tasks):
        async for task in tasks:
            global_round = task.data['global_round']
            node_id = task.data['node_id']
            model_path = os.path.join(ROUND_WEIGHTS_FOLDER,
                                      f'round_{global_round - 1}.h5')
            ctx.send_file(model_path,
                          f"/golem/work/model_{global_round - 1}.h5")
            specs = {
                'start': task.data['start'],
                'end': task.data['end'],
                'batch_size': BATCH_SIZE,
                'model_path': f'model_{global_round - 1}.h5',
                'epochs': PROVIDER_EPOCHS,
                'global_round': task.data['global_round'],
                'node_number': task.data['node_id']
            }
            ctx.send_json(
                "/golem/work/specs.json",
                specs,
            )
            ctx.send_file('client.py', "/golem/work/client.py")
            ctx.run("/bin/sh", "-c", "python3 client.py")
            node_model_output = f'/golem/output/model_round_{global_round}_{node_id}.h5'
            node_log_file = f'/golem/output/log_round_{global_round}_{node_id}.json'
            ctx.download_file(
                node_model_output,
                os.path.join(WORKER_MODEL_WEIGHTS_FOLDER,
                             f'round_{global_round}_worker_{node_id}.h5'))
            ctx.download_file(
                node_log_file,
                os.path.join(
                    WORKER_LOGS_FOLDER,
                    f'log_round_{global_round}_worker_{node_id}.json'))
            yield ctx.commit(timeout=timedelta(minutes=7))
            task.accept_result()

    print(f"{TEXT_COLOR_GREEN}"
          f"Initialising your model."
          f"{TEXT_COLOR_DEFAULT}")
    model = get_compiled_model()
    print(f"{TEXT_COLOR_GREEN}" f"Loading the data" f"{TEXT_COLOR_DEFAULT}")
    training_dataset, testing_dataset, train_length, test_length = load_dataset(
        BATCH_SIZE)
    print(f"{TEXT_COLOR_GREEN}"
          f"Initial model evaluation - "
          f"{TEXT_COLOR_DEFAULT}")
    eval_results = model.evaluate(testing_dataset)
    print(f"{TEXT_COLOR_BLUE}"
          f"ROUND 0 | Loss: {eval_results[0]} | Accuracy: {eval_results[1]}"
          f"{TEXT_COLOR_DEFAULT}")
    print(f"{TEXT_COLOR_MAGENTA}"
          f"Saving Model Weights for round 0"
          f"{TEXT_COLOR_DEFAULT}")
    model.save(os.path.join(ROUND_WEIGHTS_FOLDER, 'round_0.h5'))

    for global_round_number in range(1, GLOBAL_TRAINING_ROUNDS + 1):
        print(f"{TEXT_COLOR_GREEN}"
              f"Beginning Training Round {global_round_number}"
              f"{TEXT_COLOR_DEFAULT}")
        async with Executor(
                package=package,
                max_workers=NUM_PROVIDERS,
                budget=20.0,
                timeout=timedelta(minutes=29),
                subnet_tag=SUBNET_TAG,
                event_consumer=log_summary(log_event_repr),
        ) as executor:

            # No problem if we miss a few samples
            training_subset_steps = int(train_length / NUM_PROVIDERS)
            executor_tasks = [
                Task(
                    data={
                        'start': x,
                        'end': x + training_subset_steps,
                        'global_round': global_round_number,
                        'node_id': index + 1
                    }) for index, x in enumerate(
                        list(range(0, train_length, training_subset_steps)))
            ]
            async for task in executor.submit(worker_train_model,
                                              executor_tasks):
                print(
                    f"{TEXT_COLOR_CYAN}"
                    f"Training round {global_round_number} completed on provider node {task.data['node_id']}"
                    f"{TEXT_COLOR_DEFAULT}")

        all_worker_weights = get_client_model_weights(
            WORKER_MODEL_WEIGHTS_FOLDER, global_round_number)
        averaged_weights = federated_avg_weights(all_worker_weights)
        model.set_weights(averaged_weights)

        print(f"{TEXT_COLOR_GREEN}"
              f"TRAINING ROUND {global_round_number} complete!"
              f"{TEXT_COLOR_DEFAULT}")
        eval_results = model.evaluate(testing_dataset)
        print(
            f"{TEXT_COLOR_BLUE}"
            f"ROUND {global_round_number} | Loss: {eval_results[0]} | Accuracy: {eval_results[1]}"
            f"{TEXT_COLOR_DEFAULT}")
        print(f"{TEXT_COLOR_MAGENTA}"
              f"Saving Model Weights for round {global_round_number}"
              f"{TEXT_COLOR_DEFAULT}")
        model.save(
            os.path.join(ROUND_WEIGHTS_FOLDER,
                         f'round_{global_round_number}.h5'))
Beispiel #6
0
async def main(subnet_tag: str):
    package = await vm.repo(
        image_hash="9a3b5d67b0b27746283cb5f287c13eab1beaa12d92a9f536b747c7ae",
        min_mem_gib=0.5,
        min_storage_gib=2.0,
    )

    async def worker(ctx: WorkContext, tasks):
        scene_path = str(script_dir / "cubes.blend")
        ctx.send_file(scene_path, "/golem/resource/scene.blend")
        async for task in tasks:
            frame = task.data
            crops = [{
                "outfilebasename": "out",
                "borders_x": [0.0, 1.0],
                "borders_y": [0.0, 1.0]
            }]
            ctx.send_json(
                "/golem/work/params.json",
                {
                    "scene_file": "/golem/resource/scene.blend",
                    "resolution": (400, 300),
                    "use_compositing": False,
                    "crops": crops,
                    "samples": 100,
                    "frames": [frame],
                    "output_format": "PNG",
                    "RESOURCES_DIR": "/golem/resources",
                    "WORK_DIR": "/golem/work",
                    "OUTPUT_DIR": "/golem/output",
                },
            )
            ctx.run("/golem/entrypoints/run-blender.sh")
            output_file = f"output_{frame}.png"
            ctx.download_file(f"/golem/output/out{frame:04d}.png", output_file)
            try:
                # Set timeout for executing the script on the provider. Two minutes is plenty
                # of time for computing a single frame, for other tasks it may be not enough.
                # If the timeout is exceeded, this worker instance will be shut down and all
                # remaining tasks, including the current one, will be computed by other providers.
                yield ctx.commit(timeout=timedelta(seconds=120))
                # TODO: Check if job results are valid
                # and reject by: task.reject_task(reason = 'invalid file')
                task.accept_result(result=output_file)
            except BatchTimeoutError:
                print(f"{utils.TEXT_COLOR_RED}"
                      f"Task timed out: {task}, time: {task.running_time}"
                      f"{utils.TEXT_COLOR_DEFAULT}")
                raise

    # Iterator over the frame indices that we want to render
    frames: range = range(0, 60, 10)
    # Worst-case overhead, in minutes, for initialization (negotiation, file transfer etc.)
    # TODO: make this dynamic, e.g. depending on the size of files to transfer
    init_overhead = 3
    # Providers will not accept work if the timeout is outside of the [5 min, 30min] range.
    # We increase the lower bound to 6 min to account for the time needed for our demand to
    # reach the providers.
    min_timeout, max_timeout = 6, 30

    timeout = timedelta(
        minutes=max(min(init_overhead +
                        len(frames) * 2, max_timeout), min_timeout))

    # By passing `event_consumer=log_summary()` we enable summary logging.
    # See the documentation of the `yapapi.log` module on how to set
    # the level of detail and format of the logged information.
    async with Executor(
        package=package,
        max_workers=3,
        budget=10.0,
        timeout=timeout,
        subnet_tag=subnet_tag,
        event_consumer=log_summary(log_event_repr),
    ) as executor:

        async for task in executor.submit(
                worker, [Task(data=frame) for frame in frames]):
            print(
                f"{utils.TEXT_COLOR_CYAN}"
                f"Task computed: {task}, result: {task.result}, time: {task.running_time}"
                f"{utils.TEXT_COLOR_DEFAULT}")
Beispiel #7
0
async def main(subnet_tag, driver=None, network=None):
    package = await vm.repo(
        image_hash="2c5d9a80847eb147261c4e33df6e6955666ddd932ec40fd8b005f799",
        min_mem_gib=1,
        min_storage_gib=10.0,
    )

    async def worker(ctx: WorkContext, tasks):
        script_dir = pathlib.Path(__file__).resolve().parent
        async for task in tasks:
            frame = task.data
            ctx.run("/bin/sh", "-c", "./generate_data.sh")
            output_file = f"output/{frame}.zip"
            ctx.download_file(f"/golem/output/data.zip", output_file)
            try:
                # Set timeout for executing the script on the provider. Two minutes is plenty
                # of time for computing a single frame, for other tasks it may be not enough.
                # If the timeout is exceeded, this worker instance will be shut down and all
                # remaining tasks, including the current one, will be computed by other providers.
                yield ctx.commit(timeout=timedelta(seconds=1750))
                # TODO: Check if job results are valid
                # and reject by: task.reject_task(reason = 'invalid file')
                task.accept_result(result=output_file)
            except BatchTimeoutError:
                print(
                    f"{TEXT_COLOR_RED}"
                    f"Task {task} timed out on {ctx.provider_name}, time: {task.running_time}"
                    f"{TEXT_COLOR_DEFAULT}")
                raise

    # Iterator over the frame indices that we want to render
    frames: range = range(
        0,
        60,
    )
    # Worst-case overhead, in minutes, for initialization (negotiation, file transfer etc.)
    # TODO: make this dynamic, e.g. depending on the size of files to transfer
    init_overhead = 3
    # Providers will not accept work if the timeout is outside of the [5 min, 30min] range.
    # We increase the lower bound to 6 min to account for the time needed for our demand to
    # reach the providers.
    min_timeout, max_timeout = 6, 30

    timeout = timedelta(
        minutes=max(min(init_overhead +
                        len(frames) * 2, max_timeout), min_timeout))

    # By passing `event_consumer=log_summary()` we enable summary logging.
    # See the documentation of the `yapapi.log` module on how to set
    # the level of detail and format of the logged information.
    async with Executor(
        package=package,
        max_workers=30,
        budget=10.0,
        timeout=timeout,
        subnet_tag=subnet_tag,
        driver=driver,
        network=network,
        event_consumer=log_summary(log_event_repr),
    ) as executor:

        sys.stderr.write(
            f"yapapi version: {TEXT_COLOR_YELLOW}{yapapi_version}{TEXT_COLOR_DEFAULT}\n"
            f"Using subnet: {TEXT_COLOR_YELLOW}{subnet_tag}{TEXT_COLOR_DEFAULT}, "
            f"payment driver: {TEXT_COLOR_YELLOW}{executor.driver}{TEXT_COLOR_DEFAULT}, "
            f"and network: {TEXT_COLOR_YELLOW}{executor.network}{TEXT_COLOR_DEFAULT}\n"
        )

        num_tasks = 0
        start_time = datetime.now()

        async for task in executor.submit(
                worker, [Task(data=frame) for frame in frames]):
            num_tasks += 1
            print(
                f"{TEXT_COLOR_CYAN}"
                f"Task computed: {task}, result: {task.result}, time: {task.running_time}"
                f"{TEXT_COLOR_DEFAULT}")

        print(
            f"{TEXT_COLOR_CYAN}"
            f"{num_tasks} tasks computed, total time: {datetime.now() - start_time}"
            f"{TEXT_COLOR_DEFAULT}")