class FuncXExecutor(Executor):
    def __init__(self, endpoint_id, process_function="301f653b-40b6-449e-ad2e-e57d3aaa33cd"):
        self.fxc = FuncXClient(asynchronous=True)
        self.endpoint_id = endpoint_id
        self.process_function = process_function

    def run_async_analysis(self, file_url, tree_name, accumulator, process_func):
        if not self.process_function:
            self.process_function = self.fxc.register_function(run_coffea_processor)

        pickled_process_func = pickle.dumps(process_func)

        data_result = self.safe_run(file_url, tree_name,
                                    accumulator,
                                    pickled_process_func,
                                    function_id=self.process_function)

        # Pass this down to the next item in the stream.
        return data_result

    @retry(wait=wait_fixed(5), retry=retry_if_exception_type(MaxRequestsExceeded))
    def safe_run(self, file_url, tree_name, accumulator, proc, function_id):
        return self.fxc.run(file_url,
                            tree_name,
                            accumulator,
                            proc,
                            True,
                            endpoint_id=self.endpoint_id,
                            function_id=function_id)
Beispiel #2
0
    def register_function(self, container_type='docker', location=None, ep_id=None, group=None):
        from funcx import FuncXClient

        assert self.extr_func is not None, "Extractor function must first be registered!"

        if location is None:
            location = self.store_url

        fxc = FuncXClient()

        container_id = fxc.register_container(
            location=location,
            container_type=container_type,
            name='kube-tabular',
            description='I don\'t think so!',
            )
        self.func_id = fxc.register_function(self.extr_func,
                                             ep_id,
                                             group=group,
                                             container_uuid=container_id,
                                             description="A sum function")

        print(f"The function has been updated! "
              f"Please copy/paste the following code into {self.func_id} function class:\n")
        print(self.func_id)
        return self.func_id
Beispiel #3
0
def _register_function():
    """Register the inference function with FuncX"""

    client = FuncXClient()

    # Get the Group UUID
    config = json.loads(_config_path.read_text())
    function_id = client.register_function(_funcx_func,
                                           group=config['group_uuid'])
    _set_config(function_id=function_id)
    def __init__(self):
        self.current_tasks_on_ep = 0
        self.max_tasks_on_ep = file_cutoff  # IF SET TO FILE_CUTOFF, THEN THIS IS THE MAX.
        self.fxc = FuncXClient()

        self.funcx_batches = Queue()
        self.polling_queue = Queue()

        self.num_poll_reqs = 0
        self.num_send_reqs = 0

        self.total_families_sent = 0

        self.successes = 0
        self.failures = 0

        self.max_outstanding_tasks = max_outstanding_tasks

        self.family_queue = Queue()

        self.fam_batches = []

        # big_json = "/home/ubuntu/old_xtracthub-service/experiments/tyler_everything.json"
        # big_json = "/Users/tylerskluzacek/Desktop/tyler_everything.json"

        import os
        print(os.getcwd())

        #big_json = "../experiments/tyler_30k.json"
        big_json = "experiments/tyler_200k.json"
        # big_json = "/Users/tylerskluzacek/PyCharmProjects/xtracthub-service/experiments/tyler_20k.json"

        t0 = time.time()
        with open(big_json, 'r') as f:
            self.fam_list = json.load(f)

        print(f"Number of famlilies in fam_list: {len(self.fam_list)}")
        t1 = time.time()

        print(f"Time to load families: {t1-t0}")
        time.sleep(5)  # Time to read!!!

        # Transfer the stored list to a queue to promote good concurrency while making batches.
        i = 0  # TODO: added skip logic here!
        for item in self.fam_list:
            if i < skip_n:
                continue
            self.family_queue.put(item)

        self.start_time = time.time()

        self.preproc_fam_batches()

        print(f"Number of funcX batches: {self.funcx_batches.qsize()}")
Beispiel #5
0
    def funcx_client(self):
        """
        :returns an authorized funcx client
        """
        if getattr(self, '__funcx_client', None) is not None:
            return self.__funcx_client

        self.__funcx_client = FuncXClient()
        return self.__funcx_client
Beispiel #6
0
def get_fx_client(headers):
    tokens = headers
    fx_auth = AccessTokenAuthorizer(tokens['Authorization'].replace(
        'Bearer ', ''))
    search_auth = AccessTokenAuthorizer(tokens['Search'])
    openid_auth = AccessTokenAuthorizer(tokens['Openid'])

    fxc = FuncXClient(fx_authorizer=fx_auth,
                      search_authorizer=search_auth,
                      openid_authorizer=openid_auth)
    return fxc
Beispiel #7
0
    def __init__(self):
        self.current_tasks_on_ep = 0

        self.max_tasks_on_ep = 90000

        self.fxc = FuncXClient()

        self.funcx_batches = Queue()
        self.polling_queue = Queue()

        self.num_poll_reqs = 0
        self.num_send_reqs = 0

        self.total_families_sent = 0

        self.successes = 0
        self.failures = 0

        self.fam_batches = []

        # NOTE: Changed away from X in order to load from CSV.
        # big_json = "/Users/tylerskluzacek/PyCharmProjects/xtracthub-service/experiments/tyler_20k.json"
        #
        # with open(big_json, 'r') as f:
        #     self.fam_list = json.load(f)

        self.image_path_list = Queue()
        with open('train2014_images.csv') as f:
            reader = csv.reader(f)
            for row in reader:
                # print(row[0])
                self.image_path_list.put(row[0])

        # exit()
        self.start_time = time.time()

        self.preproc_fam_batches()
Beispiel #8
0
def main(args: Optional[List[str]] = None):
    """Launch service that automatically processes images and displays results as a web service"""

    # Make the argument parser
    parser = ArgumentParser()
    subparsers = parser.add_subparsers(
        dest='command',
        help='Which mode to launch the server in',
        required=True)

    # Add in the configuration settings
    config_parser = subparsers.add_parser(
        'config', help='Define the configuration for the server')
    config_parser.add_argument('--function-id',
                               help='UUID of the function to be run')
    config_parser.add_argument(
        '--funcx-endpoint',
        help='FuncX endpoint on which to run image processing')

    # Add in the launch setting
    start_parser = subparsers.add_parser('start',
                                         help='Launch the processing service')
    start_parser.add_argument('--model',
                              choices=['tf', 'pytorch'],
                              default='pytorch',
                              help='Which segmentation model to use')
    start_parser.add_argument('--regex',
                              default=r'.*.tiff?$',
                              help='Regex to match files')
    start_parser.add_argument(
        '--redo-existing',
        action='store_true',
        help='Submit any existing files in the directory')
    start_parser.add_argument('--local',
                              action='store_true',
                              help='Perform image analysis locally,'
                              ' instead of via FuncX')
    start_parser.add_argument('watch_dir',
                              help='Which directory to watch for new files')

    # Add in the register setting
    subparsers.add_parser('register', help='(Re)-register the funcX function')

    # Parse the input arguments
    args = parser.parse_args(args)

    # Make the logger
    logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=logging.INFO)

    # Handle the configuration
    if args.command == 'config':
        return _set_config(function_id=args.function_id,
                           endpoint_id=args.funcx_endpoint)
    elif args.command == 'register':
        return _register_function()

    assert args.command == 'start', f'Internal Error: The command "{args.command}" is not yet supported. Contact Logan'

    # Select the correct segmenter
    if args.model == 'tf':
        segmenter = TFSegmenter()
    elif args.model == 'pytorch':
        segmenter = PyTorchSegmenter()
    else:
        raise ValueError(f'Model type "{args.model}" is not supported yet')

    # Prepare the event handler
    if args.local:
        handler = LocalProcessingHandler(segmenter=segmenter,
                                         file_regex=args.regex)
    else:
        client = FuncXClient()
        client.max_request_size = 50 * 1024**2
        with open(_config_path, 'r') as fp:
            config = json.load(fp)
        handler = FuncXSubmitEventHandler(segmenter,
                                          client,
                                          config['function_id'],
                                          config['endpoint_id'],
                                          file_regex=args.regex)

    # Prepare the watch directory
    watch_dir = Path(args.watch_dir)
    mask_dir = watch_dir.joinpath('masks')
    mask_dir.mkdir(exist_ok=True)

    # Launch the flask app
    app.config['exec_queue'] = handler.queue
    app.config['watch_dir'] = Path(args.watch_dir)
    flask_thr = Thread(target=app.run, daemon=True, name='rtdefects.flask')
    flask_thr.start()

    # Launch the watcher
    obs = Observer()
    obs.schedule(handler, path=args.watch_dir, recursive=False)
    obs.start()

    # If desired, submit the existing files
    data_path = mask_dir.joinpath('defect-details.json')
    if args.redo_existing:
        data_path.unlink(missing_ok=True)  # Delete any existing data
        for file in watch_dir.iterdir():
            if file.is_file():
                handler.submit_file(file)

    # Wait for results to complete
    try:
        for index, (img_path, mask, defect_info,
                    rtt) in enumerate(handler.iterate_results()):
            # Report the completed result
            logger.info(
                f'Result received for {index + 1}/{handler.index}. RTT: {rtt:.2f}s.'
                f' Backlog: {handler.queue.qsize()}')

            # Save the mask to disk
            out_name = mask_dir.joinpath(img_path.name)
            with out_name.open('wb') as fp:
                fp.write(mask)
            logger.info(f'Wrote output file to: {out_name}')

            # Write out the image defect information
            defect_info['created_time'] = datetime.fromtimestamp(
                img_path.stat().st_mtime).isoformat()
            defect_info['completed_time'] = datetime.now().isoformat()
            defect_info['mask-path'] = str(out_name)
            defect_info['image-path'] = str(img_path)
            defect_info['rtt'] = rtt
            with data_path.open('a') as fp:
                print(json.dumps(defect_info), file=fp)
    except KeyboardInterrupt:
        logger.info('Detected an interrupt. Stopping system')
    except BaseException:
        obs.stop()
        logger.warning('Unexpected failure!')
        raise

    # Shut down the file reader
    obs.stop()
    obs.join()
Beispiel #9
0
        "--ws_uri",
        default="ws://localhost:6000",
        help="WebSocket URI to get task results",
    )
    parser.add_argument(
        "-e",
        "--endpoint_id",
        required=True,
        help="Target endpoint to send functions to",
    )
    parser.add_argument("-b",
                        "--batch",
                        action="store_true",
                        help="Enable batch or not")
    args = parser.parse_args()

    fx = FuncXExecutor(
        FuncXClient(funcx_service_address=args.service_url,
                    results_ws_uri=args.ws_uri),
        batch_enabled=args.batch,
    )

    start = time.time()
    print("Running simple test")
    test_simple(fx, args.endpoint_id)
    print(f"Complete in {time.time() - start}")

    start = time.time()
    run_loop(fx, args.endpoint_id)
    print(f"Complete in {time.time() - start}")
Beispiel #10
0
class test_orch():
    def __init__(self):
        self.current_tasks_on_ep = 0

        self.max_tasks_on_ep = 90000

        self.fxc = FuncXClient()

        self.funcx_batches = Queue()
        self.polling_queue = Queue()

        self.num_poll_reqs = 0
        self.num_send_reqs = 0

        self.total_families_sent = 0

        self.successes = 0
        self.failures = 0

        self.fam_batches = []

        # NOTE: Changed away from X in order to load from CSV.
        # big_json = "/Users/tylerskluzacek/PyCharmProjects/xtracthub-service/experiments/tyler_20k.json"
        #
        # with open(big_json, 'r') as f:
        #     self.fam_list = json.load(f)

        self.image_path_list = Queue()
        with open('train2014_images.csv') as f:
            reader = csv.reader(f)
            for row in reader:
                # print(row[0])
                self.image_path_list.put(row[0])

        # exit()
        self.start_time = time.time()

        self.preproc_fam_batches()

    def path_converter(self, family_id, old_path):
        path_ls = old_path.split('/')
        file_name = path_ls[-1]
        new_path = None
        if system == "midway2":
            new_path = f"/project2/chard/skluzacek/{family_id}/{file_name}"
        elif system == "theta":
            new_path = f"/projects/CSC249ADCD01/skluzacek/data_to_process/{family_id}/{file_name}"
        return new_path

    def preproc_fam_batches(self):

        total_tasks = 0

        print("PREPROCESSING!")
        while not self.image_path_list.empty():

            fam_batch = FamilyBatch()
            # print(len(fam_batch.families))
            while len(fam_batch.families) < map_size:

                if self.image_path_list.empty():
                    break

                path = self.image_path_list.get()
                print(path)
                family = dict()

                family['family_id'] = None

                # TODO: CHANGE THIS FOR THETA.
                if system == 'midway2':
                    family['files'] = [{
                        'path':
                        f'/project2/chard/skluzacek/train2014/{path}'
                    }]
                elif system == 'theta':
                    family['files'] = [{
                        'path':
                        f'/projects/CSC249ADCD01/skluzacek/train2014/{path}'
                    }]
                family['metadata'] = dict()
                family['headers'] = None
                family['download_type'] = None
                family['groups'] = []

                empty_fam = Family()
                empty_fam.from_dict(family)
                print("ADDING FAMILY TO FAM BATCH")
                fam_batch.add_family(empty_fam)

            #if total_tasks > max_tasks:
            self.fam_batches.append(fam_batch)

        img_extractor = ImageExtractor()

        print(f"REGISTERING FUNCTION")
        self.fn_uuid = img_extractor.register_function(
            container_type=container_type,
            location=location,
            ep_id=ep_id,
            group="a31d8dce-5d0a-11ea-afea-0a53601d30b5")

        current_batch = []
        for fam_batch in self.fam_batches:
            if len(current_batch) < batch_size:
                current_batch.append(fam_batch)
            else:
                print(f"Length of current batch: {len(current_batch)}")
                self.funcx_batches.put(current_batch)
                current_batch = [fam_batch]

        # Grab the stragglers.
        if len(current_batch) > 0:
            self.funcx_batches.put(current_batch)

        print("Let me see")

        batch_counter = 0
        # while not self.funcx_batches.empty():
        #     funcx_batch = self.funcx_batches.get()
        #     batch_counter += 1
        #     for batch in funcx_batch:
        #         print(len(batch.families))
        #
        # print(batch_counter)
        #
        #
        # exit()

    # TODO: let the failures fail.
    def send_batches_thr_loop(self):
        while not self.funcx_batches.empty():

            if self.current_tasks_on_ep > self.max_tasks_on_ep:
                print(f"There are {self.current_tasks_on_ep}. Sleeping...")
                time.sleep(5)
                continue

            batch = self.funcx_batches.get()
            fx_batch = self.fxc.create_batch()

            for item in batch:

                fam_batch_size = len(item.families)

                fx_batch.add(
                    {
                        'family_batch': item,
                        'creds': None,
                        'download_file': None
                    },
                    endpoint_id=ep_id,
                    function_id=self.fn_uuid)
                self.current_tasks_on_ep += fam_batch_size

            try:
                res = self.fxc.batch_run(fx_batch)
                self.num_send_reqs += 1
            except:
                time.sleep(0.5)
                continue

            num_tids = 0
            for tid in res:
                self.polling_queue.put(tid)
                num_tids += 1

            # print(f"Put {num_tids} tids into polling queue! ")
            if self.current_tasks_on_ep + self.successes > task_stop:
                # This is our unclean (approximate) way of breaking at the 'task send' stage.
                break

            # time.sleep(1)

    def polling_loop(self):
        while True:

            current_tid_batch = []
            for i in range(500):  # TODO: 1000 might be too big?
                if self.polling_queue.empty():
                    print("Polling queue empty. Creating batch!")
                    time.sleep(5)
                    break
                else:
                    tid = self.polling_queue.get()
                    current_tid_batch.append(tid)

            if len(current_tid_batch) == 0:
                print("Batch is empty. Sleeping... ")
                time.sleep(5)
            res = self.fxc.get_batch_status(current_tid_batch)
            self.num_poll_reqs += 1

            for item in res:

                # print(res[item])

                # print(res[item])
                if 'result' in res[item]:
                    print(res[item])
                    # self.successes += 1

                    ret_fam_batch = res[item]['result']['family_batch']

                    fam_len = len(ret_fam_batch.families)
                    self.successes += fam_len

                    self.current_tasks_on_ep -= fam_len

                    # NOTE -- we're doing nothing with the returned metadata here.

                elif 'exception' in res[item]:
                    res[item]['exception'].reraise()

                elif 'status' in res[item]:
                    self.polling_queue.put(item)
                else:
                    print("*********ERROR *************")
                    self.failures += 1
                    print(res)

    def stats_loop(self):
        while True:
            print("*********************************")
            print(f"Num successes: {self.successes}")
            print(f"Num failures: {self.failures}")
            print(f"Only {self.current_tasks_on_ep} tasks at endpoint. ")

            print(f"Number of send requests: {self.num_send_reqs}")
            print(f"Number of poll requests: {self.num_poll_reqs}")
            print("*********************************")
            print(f"Elapsed time: {time.time() - self.start_time}")
            time.sleep(5)
class test_orch():
    def __init__(self):
        self.current_tasks_on_ep = 0
        self.max_tasks_on_ep = file_cutoff  # IF SET TO FILE_CUTOFF, THEN THIS IS THE MAX.
        self.fxc = FuncXClient()

        self.funcx_batches = Queue()
        self.polling_queue = Queue()

        self.num_poll_reqs = 0
        self.num_send_reqs = 0

        self.total_families_sent = 0

        self.successes = 0
        self.failures = 0

        self.max_outstanding_tasks = max_outstanding_tasks

        self.family_queue = Queue()

        self.fam_batches = []

        # big_json = "/home/ubuntu/old_xtracthub-service/experiments/tyler_everything.json"
        # big_json = "/Users/tylerskluzacek/Desktop/tyler_everything.json"

        import os
        print(os.getcwd())

        #big_json = "../experiments/tyler_30k.json"
        big_json = "experiments/tyler_200k.json"
        # big_json = "/Users/tylerskluzacek/PyCharmProjects/xtracthub-service/experiments/tyler_20k.json"

        t0 = time.time()
        with open(big_json, 'r') as f:
            self.fam_list = json.load(f)

        print(f"Number of famlilies in fam_list: {len(self.fam_list)}")
        t1 = time.time()

        print(f"Time to load families: {t1-t0}")
        time.sleep(5)  # Time to read!!!

        # Transfer the stored list to a queue to promote good concurrency while making batches.
        i = 0  # TODO: added skip logic here!
        for item in self.fam_list:
            if i < skip_n:
                continue
            self.family_queue.put(item)

        self.start_time = time.time()

        self.preproc_fam_batches()

        print(f"Number of funcX batches: {self.funcx_batches.qsize()}")
        # exit()

    def path_converter(self, family_id, old_path):
        path_ls = old_path.split('/')
        file_name = path_ls[-1]
        new_path = None
        if system == "midway2":
            new_path = f"/project2/chard/skluzacek/data_to_process/{family_id}/{file_name}"
        elif system == "theta":
            new_path = f"/projects/CSC249ADCD01/skluzacek{old_path}"  #TODO: change this for things
        elif system == "js":
            new_path = f"/home/tskluzac/{family_id}/{file_name}"
        return new_path

    def preproc_fam_batches(self):

        fam_count = 0

        # Just create an empty one out here so Python doesn't yell at me.
        fam_batch = FamilyBatch()

        num_overloads = 0
        # while we have files and haven't exceeded the weak scaling threshold (file_cutoff)
        while not self.family_queue.empty() and fam_count < file_cutoff:

            fam_batch = FamilyBatch()
            total_fam_batch_size = 0

            # Keep making batch until
            while len(fam_batch.families
                      ) < map_size and not self.family_queue.empty(
                      ) and fam_count < file_cutoff:

                fam_count += 1
                fam = self.family_queue.get()

                total_family_size = 0
                # First convert to the correct paths
                for file_obj in fam['files']:
                    old_path = file_obj['path']
                    new_path = self.path_converter(fam['family_id'], old_path)
                    file_obj['path'] = new_path
                    file_size = file_obj['metadata']['physical']['size']
                    total_family_size += file_size

                for group in fam['groups']:
                    for file_obj in group['files']:
                        old_path = file_obj['path']
                        new_path = self.path_converter(fam['family_id'],
                                                       old_path)
                        file_obj['path'] = new_path

                empty_fam = Family()
                empty_fam.from_dict(fam)

                # We will ONLY handle the SIZE issue in here.

                if soft_batch_bytes_max > 0:
                    # So if this last file would put us over the top,
                    if total_fam_batch_size + total_family_size > soft_batch_bytes_max:
                        num_overloads += 1
                        print(f"Num overloads {num_overloads}")
                        # then we append the old batch (if not empty),
                        if len(fam_batch.families) > 0:
                            self.fam_batches.append(fam_batch)

                        # empty the old one
                        fam_batch = FamilyBatch()
                        total_fam_batch_size = total_family_size

                        assert (len(fam_batch.families) == 0)

                # and then continue (here we either add to our prior fam_batch OR the new one).
                fam_batch.add_family(empty_fam)

            assert len(fam_batch.families) <= map_size

            self.fam_batches.append(fam_batch)

        # img_extractor = NothingExtractor()
        img_extractor = MatioExtractor()

        # TODO: ADDING TEST. Making sure we have all of our files here.

        ta = time.time()
        num_families = 0
        for item in self.fam_batches:
            num_families += len(item.families)

        print(num_families)
        tb = time.time()
        print(f"Time to move families: {tb-ta}")
        time.sleep(5)
        # exit()

        # exit()

        # This check makes sure our batches are the correct size to avoid the January 2021 disaster of having vastly
        #  incorrect numbers of batches.
        #
        #  Here we are checking that the number of families we are processing is LESS than the total number of
        #   batches times the batch size (e.g., the last batch can be full or empty), and the number of families
        #   is GREATER than the case where our last map is missing.
        #
        #
        #  This leaves a very small window for error. Could use modulus to be more exact.

        # TODO: Bring this back (but use for grouping by num. files)

        # try:
        #     assert len(self.fam_batches) * (map_size-1) <= fam_count <= len(self.fam_batches) * map_size
        # except AssertionError as e:
        #     print(f"Caught {e} after creating client batches...")
        #     print(f"Number of batches: {len(self.fam_batches)}")
        #     print(f"Family Count: {fam_count}")
        #
        #     print("Cannot continue. Exiting...")
        #     exit()

        print(f"Container type: {container_type}")
        print(f"Location: {location}")
        self.fn_uuid = img_extractor.register_function(
            container_type=container_type,
            location=location,
            ep_id=ep_id,
            group="a31d8dce-5d0a-11ea-afea-0a53601d30b5")

        # funcX batching. Here we take the 'user' FamilyBatch objects and put them into a batch we send to funcX.
        num_fx_batches = 0
        current_batch = []

        print(f"Number of family batches: {len(self.fam_batches)}")
        for fam_batch in self.fam_batches:

            # print(len(current_batch))
            # print(batch_size)

            if len(current_batch) < batch_size:
                current_batch.append(fam_batch)
            else:
                # print("Marking batch!")
                # print(len(current_batch))
                self.funcx_batches.put(current_batch)
                current_batch = [fam_batch]
                num_fx_batches += 1

        # Grab the stragglers.
        if len(current_batch) > 0:
            print("Marking batch!")
            self.funcx_batches.put(current_batch)
            num_fx_batches += 1

        # See same description as above (map example) for explanation.
        try:
            theor_full_batches = math.ceil(len(self.fam_batches) / batch_size)

            # print(f"Theoretical full batches: {}")
            assert theor_full_batches == num_fx_batches
        except AssertionError as e:
            print(f"Caught {e} after creating funcX batches...")
            print(f"Number of batches: {self.funcx_batches.qsize()}")
            print(f"Family Count: {num_fx_batches}")

            print("Cannot continue. Exiting...")
            exit()

    # TODO: let the failures fail.
    def send_batches_thr_loop(self):

        # While there are still batches to send.
        #  Note that this should not be 'limiting' as we do that in preprocessing.
        while not self.funcx_batches.empty():

            # current_tasks_on_ep = tasks_sent - tasks_received
            if self.current_tasks_on_ep > self.max_outstanding_tasks:
                print(f"There are {self.current_tasks_on_ep}. Sleeping...")
                time.sleep(5)
                continue

            # Grab one
            batch = self.funcx_batches.get()
            fx_batch = self.fxc.create_batch()

            # Now we formally pull down each funcX batch and add each of its elements to an fx_batch.
            # TODO: could do this before putting in list.
            for item in batch:

                fam_batch_size = len(item.families)

                fx_batch.add({'family_batch': item},
                             endpoint_id=ep_id,
                             function_id=self.fn_uuid)
                self.current_tasks_on_ep += fam_batch_size

            # try:
            # TODO: bring this back when we figure out what errors it's causing.
            import random
            x = random.randint(1, 5)
            time.sleep(x / 2)
            res = self.fxc.batch_run(fx_batch)
            self.num_send_reqs += 1
            # except Exception as e:
            #     print("WE CAUGHT AN EXCEPTION WHILE SENDING. ")
            #     time.sleep(0.5)
            #     continue

            for tid in res:
                self.polling_queue.put(tid)

            # import random
            # time.sleep(random.randint(1,3))
            # time.sleep(0.75)

    def polling_loop(self):
        while True:

            current_tid_batch = []

            for i in range(500):  # TODO: 1000 might be too big?

                if self.polling_queue.empty():
                    print("Polling queue empty. Creating batch!")
                    time.sleep(3)
                    break
                else:
                    tid = self.polling_queue.get()
                    current_tid_batch.append(tid)

            if len(current_tid_batch) == 0:
                print("Batch is empty. Sleeping... ")
                time.sleep(5)

            time.sleep(0.5)

            start_req = time.time()
            res = self.fxc.get_batch_status(current_tid_batch)
            end_req = time.time()
            self.num_poll_reqs += 1

            print(f"Time to process batch: {end_req-start_req}")

            for item in res:

                # print(res[item])
                if 'result' in res[item]:

                    print(f"Received result: {res[item]['result']}")
                    exit()

                    # print(res[item])

                    #print(res[item]['result'])

                    # ret_fam_batch = res[item]['result']['family_batch']
                    ret_fam_batch = res[item]['result']

                    num_finished = ret_fam_batch['finished']

                    print(num_finished)

                    # timer = res[item]['result']['total_time']

                    family_file_size = 0
                    bad_extract_time = 0
                    good_extract_time = 0

                    good_parsers = ""

                    # family_mdata_size = get_deep_size(ret_fam_batch)
                    #
                    # for family in ret_fam_batch.families:
                    #
                    #     # print(family.metadata)
                    #
                    #     for file in family.files:
                    #         family_file_size += file['metadata']['physical']['size']
                    #
                    #     for gid in family.groups:
                    #         g_mdata = family.groups[gid].metadata
                    #         # print(g_mdata)
                    #
                    #         if g_mdata['matio'] != {} and g_mdata['matio'] is not None:
                    #             good_parsers = good_parsers + g_mdata['parser']
                    #             good_extract_time += g_mdata['extract time']
                    #         else:
                    #             bad_extract_time = g_mdata['extract time']
                    #
                    #     # TODO: These are at the family_batch level.
                    #
                    #     import_time = res[item]['result']["import_time"]
                    #     family_fetch_time = res[item]['result']["family_fetch_time"]
                    #     file_unpack_time = res[item]['result']["file_unpack_time"]
                    #     full_extraction_loop_time = res[item]['result']["full_extract_loop_time"]

                    # import_time = 0
                    # family_fetch_time = 0
                    # file_unpack_time = 0
                    # full_extraction_loop_time = 0
                    #
                    # with open('timer_file.txt', 'a') as g:
                    #     csv_writer = csv.writer(g)
                    #     csv_writer.writerow([timer, family_file_size, family_mdata_size, good_extract_time,
                    #                         bad_extract_time, import_time, family_fetch_time, file_unpack_time,
                    #                         full_extraction_loop_time, good_parsers])

                    # fam_len = len(ret_fam_batch.families)

                    with open('timer2.txt', 'a') as g:
                        csv_writer = csv.writer(g)
                        csv_writer.writerow([time.time(), num_finished])

                    self.successes += num_finished

                    self.current_tasks_on_ep -= num_finished

                    # NOTE -- we're doing nothing with the returned metadata here.

                elif 'exception' in res[item]:
                    res[item]['exception'].reraise()

                else:
                    self.polling_queue.put(item)
                """

                else:
                    print("*********ERROR *************")
                    self.failures += 1
                    print(res)
                """

    def stats_loop(self):
        while True:
            print("*********************************")
            print(f"Num successes: {self.successes}")
            print(f"Num failures: {self.failures}")
            print(f"Only {self.current_tasks_on_ep} tasks at endpoint. ")

            print(f"Number of send requests: {self.num_send_reqs}")
            print(f"Number of poll requests: {self.num_poll_reqs}")
            print("*********************************")
            print(f"Elapsed time: {time.time() - self.start_time}")
            time.sleep(5)
Beispiel #12
0
    def __init__(self,
                 endpoints,
                 strategy='round-robin',
                 runtime_predictor='rolling-average',
                 last_n=3,
                 train_every=1,
                 log_level='INFO',
                 import_model_file=None,
                 transfer_model_file=None,
                 sync_level='exists',
                 max_backups=0,
                 backup_delay_threshold=2.0,
                 *args,
                 **kwargs):
        self._fxc = FuncXClient(*args, **kwargs)

        # Initialize a transfer client
        self._transfer_manger = TransferManager(endpoints=endpoints,
                                                sync_level=sync_level,
                                                log_level=log_level)

        # Info about FuncX endpoints we can execute on
        self._endpoints = endpoints
        self._dead_endpoints = set()
        self.last_result_time = defaultdict(float)
        self.temperature = defaultdict(lambda: 'WARM')
        self._imports = defaultdict(list)
        self._imports_required = defaultdict(list)

        # Track which endpoints a function can't run on
        self._blocked = defaultdict(set)

        # Track pending tasks
        # We will provide the client our own task ids, since we may submit the
        # same task multiple times to the FuncX service, and sometimes we may
        # wait to submit a task to FuncX (e.g., wait for a data transfer).
        self._task_id_translation = {}
        self._pending = {}
        self._pending_by_endpoint = defaultdict(set)
        self._task_info = {}
        # List of endpoints a (virtual) task was scheduled to
        self._endpoints_sent_to = defaultdict(list)
        self.max_backups = max_backups
        self.backup_delay_threshold = backup_delay_threshold
        self._latest_status = {}
        self._last_task_ETA = defaultdict(float)
        # Maximum ETA, if any, of a task which we allow to be scheduled on an
        # endpoint. This is to prevent backfill tasks to be longer than the
        # estimated time for when a pending data transfer will finish.
        self._transfer_ETAs = defaultdict(dict)
        # Estimated error in the pending-task time of an endpoint.
        # Updated every time a task result is received from an endpoint.
        self._queue_error = defaultdict(float)

        # Set logging levels
        logger.setLevel(log_level)
        self.execution_log = []

        # Intialize serializer
        self.fx_serializer = FuncXSerializer()
        self.fx_serializer.use_custom('03\n', 'code')

        # Initialize runtime predictor
        self.runtime = init_runtime_predictor(runtime_predictor,
                                              endpoints=endpoints,
                                              last_n=last_n,
                                              train_every=train_every)
        logger.info(f"Runtime predictor using strategy {self.runtime}")

        # Initialize transfer-time predictor
        self.transfer_time = TransferPredictor(endpoints=endpoints,
                                               train_every=train_every,
                                               state_file=transfer_model_file)

        # Initialize import-time predictor
        self.import_predictor = ImportPredictor(endpoints=endpoints,
                                                state_file=import_model_file)

        # Initialize scheduling strategy
        self.strategy = init_strategy(strategy,
                                      endpoints=endpoints,
                                      runtime_predictor=self.runtime,
                                      queue_predictor=self.queue_delay,
                                      cold_start_predictor=self.cold_start,
                                      transfer_predictor=self.transfer_time)
        logger.info(f"Scheduler using strategy {self.strategy}")

        # Start thread to check on endpoints regularly
        self._endpoint_watchdog = Thread(target=self._check_endpoints)
        self._endpoint_watchdog.start()

        # Start thread to monitor tasks and send tasks to FuncX service
        self._scheduled_tasks = Queue()
        self._task_watchdog_sleep = 0.15
        self._task_watchdog = Thread(target=self._monitor_tasks)
        self._task_watchdog.start()
Beispiel #13
0
class CentralScheduler(object):
    def __init__(self,
                 endpoints,
                 strategy='round-robin',
                 runtime_predictor='rolling-average',
                 last_n=3,
                 train_every=1,
                 log_level='INFO',
                 import_model_file=None,
                 transfer_model_file=None,
                 sync_level='exists',
                 max_backups=0,
                 backup_delay_threshold=2.0,
                 *args,
                 **kwargs):
        self._fxc = FuncXClient(*args, **kwargs)

        # Initialize a transfer client
        self._transfer_manger = TransferManager(endpoints=endpoints,
                                                sync_level=sync_level,
                                                log_level=log_level)

        # Info about FuncX endpoints we can execute on
        self._endpoints = endpoints
        self._dead_endpoints = set()
        self.last_result_time = defaultdict(float)
        self.temperature = defaultdict(lambda: 'WARM')
        self._imports = defaultdict(list)
        self._imports_required = defaultdict(list)

        # Track which endpoints a function can't run on
        self._blocked = defaultdict(set)

        # Track pending tasks
        # We will provide the client our own task ids, since we may submit the
        # same task multiple times to the FuncX service, and sometimes we may
        # wait to submit a task to FuncX (e.g., wait for a data transfer).
        self._task_id_translation = {}
        self._pending = {}
        self._pending_by_endpoint = defaultdict(set)
        self._task_info = {}
        # List of endpoints a (virtual) task was scheduled to
        self._endpoints_sent_to = defaultdict(list)
        self.max_backups = max_backups
        self.backup_delay_threshold = backup_delay_threshold
        self._latest_status = {}
        self._last_task_ETA = defaultdict(float)
        # Maximum ETA, if any, of a task which we allow to be scheduled on an
        # endpoint. This is to prevent backfill tasks to be longer than the
        # estimated time for when a pending data transfer will finish.
        self._transfer_ETAs = defaultdict(dict)
        # Estimated error in the pending-task time of an endpoint.
        # Updated every time a task result is received from an endpoint.
        self._queue_error = defaultdict(float)

        # Set logging levels
        logger.setLevel(log_level)
        self.execution_log = []

        # Intialize serializer
        self.fx_serializer = FuncXSerializer()
        self.fx_serializer.use_custom('03\n', 'code')

        # Initialize runtime predictor
        self.runtime = init_runtime_predictor(runtime_predictor,
                                              endpoints=endpoints,
                                              last_n=last_n,
                                              train_every=train_every)
        logger.info(f"Runtime predictor using strategy {self.runtime}")

        # Initialize transfer-time predictor
        self.transfer_time = TransferPredictor(endpoints=endpoints,
                                               train_every=train_every,
                                               state_file=transfer_model_file)

        # Initialize import-time predictor
        self.import_predictor = ImportPredictor(endpoints=endpoints,
                                                state_file=import_model_file)

        # Initialize scheduling strategy
        self.strategy = init_strategy(strategy,
                                      endpoints=endpoints,
                                      runtime_predictor=self.runtime,
                                      queue_predictor=self.queue_delay,
                                      cold_start_predictor=self.cold_start,
                                      transfer_predictor=self.transfer_time)
        logger.info(f"Scheduler using strategy {self.strategy}")

        # Start thread to check on endpoints regularly
        self._endpoint_watchdog = Thread(target=self._check_endpoints)
        self._endpoint_watchdog.start()

        # Start thread to monitor tasks and send tasks to FuncX service
        self._scheduled_tasks = Queue()
        self._task_watchdog_sleep = 0.15
        self._task_watchdog = Thread(target=self._monitor_tasks)
        self._task_watchdog.start()

    def block(self, func, endpoint):
        if endpoint not in self._endpoints:
            logger.error('Cannot block unknown endpoint {}'.format(endpoint))
            return {
                'status': 'Failed',
                'reason': 'Unknown endpoint {}'.format(endpoint)
            }
        elif len(self._blocked[func]) == len(self._endpoints) - 1:
            logger.error(
                'Cannot block last remaining endpoint {}'.format(endpoint))
            return {
                'status': 'Failed',
                'reason': 'Cannot block all endpoints for {}'.format(func)
            }
        else:
            logger.info('Blocking endpoint {} for function {}'.format(
                endpoint_name(endpoint), func))
            self._blocked[func].add(endpoint)
            return {'status': 'Success'}

    def register_imports(self, func, imports):
        logger.info('Registered function {} with imports {}'.format(
            func, imports))
        self._imports_required[func] = imports

    def batch_submit(self, tasks, headers):
        # TODO: smarter scheduling for batch submissions

        task_ids = []
        endpoints = []

        for func, payload in tasks:
            _, ser_kwargs = self.fx_serializer.unpack_buffers(payload)
            kwargs = self.fx_serializer.deserialize(ser_kwargs)
            files = kwargs['_globus_files']

            task_id, endpoint = self._schedule_task(func=func,
                                                    payload=payload,
                                                    headers=headers,
                                                    files=files)
            task_ids.append(task_id)
            endpoints.append(endpoint)

        return task_ids, endpoints

    def _schedule_task(self, func, payload, headers, files, task_id=None):

        # If this is the first time scheduling this task_id
        # (i.e., non-backup task), record the necessary metadata
        if task_id is None:
            # Create (fake) task id to return to client
            task_id = str(uuid.uuid4())

            # Store task information
            self._task_id_translation[task_id] = set()

            # Information required to schedule the task, now and in the future
            info = {
                'function_id': func,
                'payload': payload,
                'headers': headers,
                'files': files,
                'time_requested': time.time()
            }
            self._task_info[task_id] = info

        # TODO: do not choose a dead endpoint (reliably)
        # exclude = self._blocked[func] | self._dead_endpoints | set(self._endpoints_sent_to[task_id])  # noqa
        if len(self._dead_endpoints) > 0:
            logger.warn('{} endpoints seem dead. Hope they still work!'.format(
                len(self._dead_endpoints)))
        exclude = self._blocked[func] | set(self._endpoints_sent_to[task_id])
        choice = self.strategy.choose_endpoint(
            func,
            payload=payload,
            files=files,
            exclude=exclude,
            transfer_ETAs=self._transfer_ETAs)  # noqa
        endpoint = choice['endpoint']
        logger.info('Choosing endpoint {} for func {}, task id {}'.format(
            endpoint_name(endpoint), func, task_id))
        choice['ETA'] = self.strategy.predict_ETA(func,
                                                  endpoint,
                                                  payload,
                                                  files=files)

        # Start Globus transfer of required files, if any
        if len(files) > 0:
            transfer_num = self._transfer_manger.transfer(
                files, endpoint, task_id)
            if transfer_num is not None:
                transfer_ETA = time.time() + self.transfer_time(
                    files, endpoint)
                self._transfer_ETAs[endpoint][transfer_num] = transfer_ETA
        else:
            transfer_num = None
            # Record endpoint ETA for queue-delay prediction here,
            # since task will be immediately scheduled
            self._last_task_ETA[endpoint] = choice['ETA']

        # If a cold endpoint is being started, mark it as no longer cold,
        # so that subsequent launch-time predictions are correct (i.e., 0)
        if self.temperature[endpoint] == 'COLD':
            self.temperature[endpoint] = 'WARMING'
            logger.info(
                'A cold endpoint {} was chosen; marked as warming.'.format(
                    endpoint_name(endpoint)))

        # Schedule task for sending to FuncX
        self._endpoints_sent_to[task_id].append(endpoint)
        self._scheduled_tasks.put((task_id, endpoint, transfer_num))

        return task_id, endpoint

    def translate_task_id(self, task_id):
        return self._task_id_translation[task_id]

    def log_status(self, real_task_id, data):
        if real_task_id not in self._pending:
            logger.warn('Ignoring unknown task id {}'.format(real_task_id))
            return

        task_id = self._pending[real_task_id]['task_id']
        func = self._pending[real_task_id]['function_id']
        endpoint = self._pending[real_task_id]['endpoint_id']
        # Don't overwrite latest status if it is a result/exception
        if task_id not in self._latest_status or \
                self._latest_status[task_id].get('status') == 'PENDING':
            self._latest_status[task_id] = data

        if 'result' in data:
            result = self.fx_serializer.deserialize(data['result'])
            runtime = result['runtime']
            name = endpoint_name(endpoint)
            logger.info('Got result from {} for task {} with time {}'.format(
                name, real_task_id, runtime))

            self.runtime.update(self._pending[real_task_id], runtime)
            self._pending[real_task_id]['runtime'] = runtime
            self._record_completed(real_task_id)
            self.last_result_time[endpoint] = time.time()
            self._imports[endpoint] = result['imports']

        elif 'exception' in data:
            exception = self.fx_serializer.deserialize(data['exception'])
            try:
                exception.reraise()
            except Exception as e:
                logger.error('Got exception on task {}: {}'.format(
                    real_task_id, e))
                exc_type, _, _ = sys.exc_info()
                if exc_type in BLOCK_ERRORS:
                    self.block(func, endpoint)

            self._record_completed(real_task_id)
            self.last_result_time[endpoint] = time.time()

        elif 'status' in data and data['status'] == 'PENDING':
            pass

        else:
            logger.error('Unexpected status message: {}'.format(data))

    def get_status(self, task_id):
        if task_id not in self._task_id_translation:
            logger.warn('Unknown client task id {}'.format(task_id))

        elif len(self._task_id_translation[task_id]) == 0:
            return {'status': 'PENDING'}  # Task has not been scheduled yet

        elif task_id not in self._latest_status:
            return {'status': 'PENDING'}  # Status has not been queried yet

        else:
            return self._latest_status[task_id]

    def queue_delay(self, endpoint):
        # Otherwise, queue delay is the ETA of most recent task,
        # plus the estimated error in the ETA prediction.
        # Note that if there are no pending tasks on endpoint, no queue delay.
        # This is implicit since, in this case, both summands will be 0.
        delay = self._last_task_ETA[endpoint] + self._queue_error[endpoint]
        return max(delay, time.time())

    def _record_completed(self, real_task_id):
        info = self._pending[real_task_id]
        endpoint = info['endpoint_id']

        # If this is the last pending task on this endpoint, reset ETA offset
        if len(self._pending_by_endpoint[endpoint]) == 1:
            self._last_task_ETA[endpoint] = 0.0
            self._queue_error[endpoint] = 0.0
        else:
            prediction_error = time.time() - self._pending[real_task_id]['ETA']
            self._queue_error[endpoint] = prediction_error
            # print(colored(f'Prediction error {prediction_error}', 'red'))

        info['ATA'] = time.time()
        del info['headers']
        self.execution_log.append(info)

        logger.info(
            'Task exec time: expected = {:.3f}, actual = {:.3f}'.format(
                info['ETA'] - info['time_sent'],
                time.time() - info['time_sent']))
        # logger.info(f'ETA_offset = {self._queue_error[endpoint]:.3f}')

        # Stop tracking this task
        del self._pending[real_task_id]
        self._pending_by_endpoint[endpoint].remove(real_task_id)
        if info['task_id'] in self._task_info:
            del self._task_info[info['task_id']]

    def cold_start(self, endpoint, func):
        # If endpoint is warm, there is no launch time
        if self.temperature[endpoint] != 'COLD':
            launch_time = 0.0
        # Otherwise, return the launch time in the endpoint config
        elif 'launch_time' in self._endpoints[endpoint]:
            launch_time = self._endpoints[endpoint]['launch_time']
        else:
            logger.warn(
                'Endpoint {} should always be warm, but is cold'.format(
                    endpoint_name(endpoint)))
            launch_time = 0.0

        # Time to import dependencies
        import_time = 0.0
        for pkg in self._imports_required[func]:
            if pkg not in self._imports[endpoint]:
                logger.debug(
                    'Cold-start has import time for pkg {} on {}'.format(
                        pkg, endpoint_name(endpoint)))
                import_time += self.import_predictor(pkg, endpoint)

        return launch_time + import_time

    def _monitor_tasks(self):
        logger.info('Starting task-watchdog thread')

        scheduled = {}

        while True:

            time.sleep(self._task_watchdog_sleep)

            # Get newly scheduled tasks
            while True:
                try:
                    task_id, end, num = self._scheduled_tasks.get_nowait()
                    if task_id not in self._task_info:
                        logger.warn(
                            'Task id {} scheduled but no info found'.format(
                                task_id))
                        continue
                    info = self._task_info[task_id]
                    scheduled[task_id] = dict(info)  # Create new copy of info
                    scheduled[task_id]['task_id'] = task_id
                    scheduled[task_id]['endpoint_id'] = end
                    scheduled[task_id]['transfer_num'] = num
                except Empty:
                    break

            # Filter out all tasks whose data transfer has not been completed
            ready_to_send = set()
            for task_id, info in scheduled.items():
                transfer_num = info['transfer_num']
                if transfer_num is None:
                    ready_to_send.add(task_id)
                    info['transfer_time'] = 0.0
                elif self._transfer_manger.is_complete(transfer_num):
                    ready_to_send.add(task_id)
                    del self._transfer_ETAs[info['endpoint_id']][transfer_num]
                    info[
                        'transfer_time'] = self._transfer_manger.get_transfer_time(
                            transfer_num)  # noqa
                else:  # This task cannot be scheduled yet
                    continue

            if len(ready_to_send) == 0:
                logger.debug('No new tasks to send. Task watchdog sleeping...')
                continue

            # TODO: different clients send different headers. change eventually
            headers = list(scheduled.values())[0]['headers']

            logger.info('Scheduling a batch of {} tasks'.format(
                len(ready_to_send)))

            # Submit all ready tasks to FuncX
            data = {'tasks': []}
            for task_id in ready_to_send:
                info = scheduled[task_id]
                submit_info = (info['function_id'], info['endpoint_id'],
                               info['payload'])
                data['tasks'].append(submit_info)

            res_str = requests.post(f'{FUNCX_API}/submit',
                                    headers=headers,
                                    data=json.dumps(data))
            try:
                res = res_str.json()
            except ValueError:
                logger.error(f'Could not parse JSON from {res_str.text}')
                continue
            if res['status'] != 'Success':
                logger.error(
                    'Could not send tasks to FuncX. Got response: {}'.format(
                        res))
                continue

            # Update task info with submission info
            for task_id, real_task_id in zip(ready_to_send, res['task_uuids']):
                info = scheduled[task_id]
                # This ETA calculation does not take into account transfer time
                # since, at this point, the transfer has already completed.
                info['ETA'] = self.strategy.predict_ETA(
                    info['function_id'], info['endpoint_id'], info['payload'])
                # Record if this ETA prediction is "reliable". If it is not
                # (e.g., when we have not learned about this (func, ep) pair),
                # backup tasks will not be sent for this task if it is delayed.
                info['is_ETA_reliable'] = self.runtime.has_learned(
                    info['function_id'], info['endpoint_id'])

                info['time_sent'] = time.time()

                endpoint = info['endpoint_id']
                self._task_id_translation[task_id].add(real_task_id)

                self._pending[real_task_id] = info
                self._pending_by_endpoint[endpoint].add(real_task_id)

                # Record endpoint ETA for queue-delay prediction
                self._last_task_ETA[endpoint] = info['ETA']

                logger.info(
                    'Sent task id {} to {} with real task id {}'.format(
                        task_id, endpoint_name(endpoint), real_task_id))

            # Stop tracking all newly sent tasks
            for task_id in ready_to_send:
                del scheduled[task_id]

    def _check_endpoints(self):
        logger.info('Starting endpoint-watchdog thread')

        while True:
            for end in self._endpoints.keys():
                statuses = self._fxc.get_endpoint_status(end)
                if len(statuses) == 0:
                    logger.warn(
                        'Endpoint {} does not have any statuses'.format(
                            endpoint_name(end)))
                else:
                    status = statuses[0]  # Most recent endpoint status

                    # Mark endpoint as dead/alive based on heartbeat's age
                    # Heartbeats are delayed when an endpoint is executing
                    # tasks, so take into account last execution too
                    age = time.time() - max(status['timestamp'],
                                            self.last_result_time[end])
                    is_dead = end in self._dead_endpoints
                    if not is_dead and age > HEARTBEAT_THRESHOLD:
                        self._dead_endpoints.add(end)
                        logger.warn(
                            'Endpoint {} seems to have died! '
                            'Last heartbeat was {:.2f} seconds ago.'.format(
                                endpoint_name(end), age))
                    elif is_dead and age <= HEARTBEAT_THRESHOLD:
                        self._dead_endpoints.remove(end)
                        logger.warn(
                            'Endpoint {} is back alive! '
                            'Last heartbeat was {:.2f} seconds ago.'.format(
                                endpoint_name(end), age))

                    # Mark endpoint as "cold" or "warm" depending on if it
                    # has active managers (nodes) allocated to it
                    if self.temperature[end] == 'WARM' \
                            and status['active_managers'] == 0:
                        self.temperature[end] = 'COLD'
                        logger.info('Endpoint {} is cold!'.format(
                            endpoint_name(end)))
                    elif self.temperature[end] != 'WARM' \
                            and status['active_managers'] > 0:
                        self.temperature[end] = 'WARM'
                        logger.info('Endpoint {} is warm again!'.format(
                            endpoint_name(end)))

            # Send backup tasks if needed
            self._send_backups_if_needed()

            # Sleep before checking statuses again
            time.sleep(5)

    def _send_backups_if_needed(self):
        # Get all tasks which have not been completed yet and still have a
        # pending (real) task on a dead endpoint
        task_ids = {
            self._pending[real_task_id]['task_id']
            for endpoint in self._dead_endpoints
            for real_task_id in self._pending_by_endpoint[endpoint]
            if self._pending[real_task_id]['task_id'] in self._task_info
        }

        # Get all tasks for which we had ETA-predictions but haven't
        # been completed even past their ETA
        for real_task_id, info in self._pending.items():
            # If the predicted ETA wasn't reliable, don't send backups
            if not info['is_ETA_reliable']:
                continue

            expected = info['ETA'] - info['time_sent']
            elapsed = time.time() - info['time_sent']

            if elapsed / expected > self.backup_delay_threshold:
                task_ids.add(info['task_id'])

        for task_id in task_ids:
            if len(self._endpoints_sent_to[task_id]) > self.max_backups:
                logger.debug(f'Skipping sending new backup task for {task_id}')
            else:
                logger.info(f'Sending new backup task for {task_id}')
                info = self._task_info[task_id]
                self._schedule_task(info['function_id'], info['payload'],
                                    info['headers'], info['files'], task_id)
Beispiel #14
0
    def __init__(self,
                 abyss_id: str,
                 globus_source_eid: str,
                 transfer_token: str,
                 compressed_files: List[Dict],
                 worker_params: List[Dict],
                 psql_conn,
                 s3_conn,
                 grouper="",
                 batcher="mmd",
                 dispatcher="fifo",
                 prediction_mode="ml"):
        """Abyss orchestrator class.
        Parameters
        ----------
        abyss_id : str
            Abyss ID for orchestration.
        globus_source_eid : str
            Globus endpoint of source data storage.
        transfer_token : str
            Globus token to authorize transfers between endpoints.
        compressed_files : list(dict)
            List of dictionaries for compressed files to process.
            Dictionaries contain "file_path" and "compressed_size".
        worker_params : list(dict)
            List of valid worker parameter dictionaries to create
            workers.
        psql_conn :
            PostgreSQL connection object to update status.
        sqs_conn :
            SQS connection object to push results to SQS.
        grouper : str
            Name of grouper to use when crawling.
        batcher : str
            Name of batcher to use.
        dispatcher : str
            Name of dispatchers to use.
        prediction_mode: str
            Mode of prediction to use to predict decompressed file size.
            "ml" to use machine learning method or "header" to use
            metadata stored in the header of compressed files (where
            possible).
        """
        self.abyss_id = abyss_id
        self.globus_source_eid = globus_source_eid
        self.transfer_token = transfer_token
        self.grouper = grouper
        self.prediction_mode = prediction_mode

        self.worker_dict = dict()
        for worker_param in worker_params:
            worker = Worker.from_dict(worker_param)
            self.worker_dict[worker.worker_id] = worker

        self.prefetchers = dict()
        for worker in self.worker_dict.values():
            globus_dest_eid = worker.globus_eid
            transfer_dir = worker.transfer_dir
            prefetcher = GlobusPrefetcher(self.transfer_token,
                                          self.globus_source_eid,
                                          globus_dest_eid, transfer_dir, 4)

            self.prefetchers[worker.worker_id] = prefetcher

        self.predictors = dict()
        for file_type, predictor in FILE_PREDICTOR_MAPPING.items():
            file_predictor = predictor()
            file_predictor.load_models()
            self.predictors[file_type] = file_predictor

        self.job_statuses = dict(
            zip([x for x in JobStatus],
                [Queue() for _ in range(len(JobStatus))]))
        unpredicted_set = self.job_statuses[JobStatus.UNPREDICTED]
        for compressed_file in compressed_files:
            job = Job.from_dict(compressed_file)
            job.status = JobStatus.UNPREDICTED
            job.file_id = str(uuid.uuid4())
            job.decompressed_size = 0
            unpredicted_set.put(job)
            logger.info(
                f"LATENCY PLACING {job.file_id} INTO UNPREDICTED AT {time.time()}"
            )

        self.scheduler = Scheduler(batcher, dispatcher,
                                   list(self.worker_dict.values()), [])
        self.worker_queues = dict()

        self.psql_conn = psql_conn
        self.abyss_metadata = []
        self.s3_conn = s3_conn

        self._unpredicted_preprocessing_thread = threading.Thread(
            target=self._unpredicted_preprocessing, daemon=True)
        self._predictor_thread = threading.Thread(
            target=self._predict_decompressed_size, daemon=True)
        self._scheduler_thread = threading.Thread(
            target=self._thread_schedule_jobs, daemon=True)
        self._prefetcher_thread = threading.Thread(
            target=self._thread_prefetch, daemon=True)
        self._prefetcher_poll_thread = threading.Thread(
            target=self._thread_poll_prefetch, daemon=True)
        self._funcx_process_headers_thread = threading.Thread(
            target=self._thread_funcx_process_headers, daemon=True)
        self._funcx_decompress_thread = threading.Thread(
            target=self._thread_funcx_decompress, daemon=True)
        self._funcx_crawl_thread = threading.Thread(
            target=self._thread_funcx_crawl, daemon=True)
        self._funcx_poll_thread = threading.Thread(
            target=self._thread_funcx_poll, daemon=True)
        self._consolidate_results_thread = threading.Thread(
            target=self._thread_consolidate_crawl_results, daemon=True)
        self._lock = threading.Lock()
        self.thread_statuses = {
            "predictor_thread": True,
            "scheduler_thread": True,
            "prefetcher_thread": True,
            "prefetcher_poll_thread": True,
            "funcx_decompress_thread": True,
            "funcx_crawl_thread": True,
            "funcx_poll_thread": True,
            "consolidate_results_thread": True
        }

        self.funcx_client = FuncXClient()
        self.kill_status = False
        self.crawl_results = Queue()
Beispiel #15
0
        sys.exit(1)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-e",
        "--endpoint_id",
        required=True,
        help="Target endpoint to send functions to",
    )
    parser.add_argument(
        "-v",
        "--ep_version",
        required=True,
        help="EP VERSION",
    )
    parser.add_argument(
        "-w",
        "--worker_version",
        required=True,
        help="Target endpoint to send functions to",
    )

    args = parser.parse_args()

    fx = FuncXExecutor(FuncXClient())
    test_worker_version(fx, args.endpoint_id, args.ep_version, args.worker_version)
    test_app_exception(fx, args.endpoint_id, args.ep_version, args.worker_version)
    test_kill_manager(fx, args.endpoint_id, args.ep_version, args.worker_version)
from extractors.utils.base_event import create_event


"""
This script will run the Gladier team's XPCS script on each 
file from the 2021-1 file set on Petrel. It will do so on Theta. 
"""

# extractors =

# TODO:
# 1. Point to the right xpcs_data file.
# 2. Point to the right bunch of metadata files.

fxc = FuncXClient()
# xpcs_x = XPCSExtractor()
# xpcs_x = NetCDFExtractor()
# xpcs_x = JsonXMLExtractor()
# xpcs_x = HDFExtractor()
# xpcs_x = ImagesExtractor()
# xpcs_x = KeywordExtractor()
# xpcs_x = PythonExtractor()
# xpcs_x = TabularExtractor()
xpcs_x = CCodeExtractor()
# xpcs_x = TikaExtractor()

ep_id = "2293034e-4c9f-459c-a6f0-0ed310a8e618"
extractor_name = "matio"
repo_name = "mdf"
 def __init__(self, endpoint_id, process_function="301f653b-40b6-449e-ad2e-e57d3aaa33cd"):
     self.fxc = FuncXClient(asynchronous=True)
     self.endpoint_id = endpoint_id
     self.process_function = process_function
Beispiel #18
0
    )
    parser.add_argument(
        "-e",
        "--endpoint_id",
        required=True,
        help="Target endpoint to send functions to",
    )
    parser.add_argument(
        "-d", "--debug", action="store_true", help="Count of apps to launch"
    )
    args = parser.parse_args()

    endpoint_id = args.endpoint_id

    # set_stream_logger()
    fx = FuncXExecutor(FuncXClient(funcx_service_address=args.service_url))

    print("In main")
    endpoint_id = args.endpoint_id
    future = fx.submit(double, 5, endpoint_id=endpoint_id)
    print("Got future back : ", future)

    for _i in range(5):
        time.sleep(0.2)
        # Non-blocking check whether future is done
        print("Is the future done? :", future.done())

    print("Blocking for result")
    x = future.result()  # <--- This is a blocking call
    print("Result : ", x)
Beispiel #19
0
    # 'xtract-keyword/xtract-keyword.img',
    # 'xtract-images/xtract-images.img',
    # 'xtract-jsonxml/xtract-jsonxml.img',
    'xtract-hdf/xtract-hdf.img',
    # 'xtract-netcdf/xtract-netcdf.img'
]


def hello_container(event):
    import os
    return f"Container version: {os.environ['container_version']}"


for container in all_containers:
    print(f"Using funcX version: {funcx.__version__}")
    fxc = FuncXClient()
    base_path = '/home/tskluzac/ext_repos/'
    container_path = os.path.join(base_path, container)
    print(f"Container path: {container_path}")
    container_uuid = fxc.register_container(container_path, 'singularity')

    fn_uuid = fxc.register_function(
        hdf_extract,
        container_uuid=container_uuid,
        description="New sum function defined without string spec")

    print(f"FN UUID: {fn_uuid}")
    res = fxc.run(sample_hdf_1, endpoint_id=js_ep_id, function_id=fn_uuid)
    print(res)
    for i in range(100):
        # TODO: break when successful
Beispiel #20
0
    my_update_mpnn = update_wrapper(my_update_mpnn, update_mpnn)

    my_retrain_mpnn = partial(retrain_mpnn,
                              num_epochs=args.num_epochs,
                              learning_rate=args.learning_rate,
                              bootstrap=True,
                              timeout=2700)
    my_retrain_mpnn = update_wrapper(my_retrain_mpnn, retrain_mpnn)

    my_run_simulation = partial(run_simulation,
                                n_nodes=args.nodes_per_task,
                                spec=args.qc_specification)
    my_run_simulation = update_wrapper(my_run_simulation, run_simulation)

    # Create the task servers
    fx_client = FuncXClient()
    task_map = dict(
        (f, args.ml_endpoint)
        for f in [my_evaluate_mpnn, my_update_mpnn, my_retrain_mpnn])
    task_map[my_run_simulation] = args.qc_endpoint
    doer = FuncXTaskServer(task_map, fx_client, server_queues)

    # Configure the "thinker" application
    thinker = Thinker(client_queues, database, args.search_space,
                      args.search_size, args.retrain_frequency,
                      args.retrain_from_scratch, models,
                      args.molecules_per_ml_task, args.num_qc_workers,
                      args.qc_specification, out_dir, args.beta,
                      args.pause_during_update, ps_names)
    logging.info('Created the method server and task generator')
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from funcx import FuncXClient
from sx_multi import run_coffea_processor

fxc = FuncXClient()
container_id = fxc.register_container(
    "bengal1/funcx_coffea:add_schema_to_notebooks", "docker",
    "Coffea Processor")
function_id = fxc.register_function(
    run_coffea_processor,
    "Run your coffea process code in a setup environment",
    container_uuid=container_id)
print("Function_ID is ", function_id)
Beispiel #22
0
def fxc(fxc_args):
    fxc = FuncXClient(**fxc_args)
    return fxc
Beispiel #23
0
class AbyssOrchestrator:
    def __init__(self,
                 abyss_id: str,
                 globus_source_eid: str,
                 transfer_token: str,
                 compressed_files: List[Dict],
                 worker_params: List[Dict],
                 psql_conn,
                 s3_conn,
                 grouper="",
                 batcher="mmd",
                 dispatcher="fifo",
                 prediction_mode="ml"):
        """Abyss orchestrator class.
        Parameters
        ----------
        abyss_id : str
            Abyss ID for orchestration.
        globus_source_eid : str
            Globus endpoint of source data storage.
        transfer_token : str
            Globus token to authorize transfers between endpoints.
        compressed_files : list(dict)
            List of dictionaries for compressed files to process.
            Dictionaries contain "file_path" and "compressed_size".
        worker_params : list(dict)
            List of valid worker parameter dictionaries to create
            workers.
        psql_conn :
            PostgreSQL connection object to update status.
        sqs_conn :
            SQS connection object to push results to SQS.
        grouper : str
            Name of grouper to use when crawling.
        batcher : str
            Name of batcher to use.
        dispatcher : str
            Name of dispatchers to use.
        prediction_mode: str
            Mode of prediction to use to predict decompressed file size.
            "ml" to use machine learning method or "header" to use
            metadata stored in the header of compressed files (where
            possible).
        """
        self.abyss_id = abyss_id
        self.globus_source_eid = globus_source_eid
        self.transfer_token = transfer_token
        self.grouper = grouper
        self.prediction_mode = prediction_mode

        self.worker_dict = dict()
        for worker_param in worker_params:
            worker = Worker.from_dict(worker_param)
            self.worker_dict[worker.worker_id] = worker

        self.prefetchers = dict()
        for worker in self.worker_dict.values():
            globus_dest_eid = worker.globus_eid
            transfer_dir = worker.transfer_dir
            prefetcher = GlobusPrefetcher(self.transfer_token,
                                          self.globus_source_eid,
                                          globus_dest_eid, transfer_dir, 4)

            self.prefetchers[worker.worker_id] = prefetcher

        self.predictors = dict()
        for file_type, predictor in FILE_PREDICTOR_MAPPING.items():
            file_predictor = predictor()
            file_predictor.load_models()
            self.predictors[file_type] = file_predictor

        self.job_statuses = dict(
            zip([x for x in JobStatus],
                [Queue() for _ in range(len(JobStatus))]))
        unpredicted_set = self.job_statuses[JobStatus.UNPREDICTED]
        for compressed_file in compressed_files:
            job = Job.from_dict(compressed_file)
            job.status = JobStatus.UNPREDICTED
            job.file_id = str(uuid.uuid4())
            job.decompressed_size = 0
            unpredicted_set.put(job)
            logger.info(
                f"LATENCY PLACING {job.file_id} INTO UNPREDICTED AT {time.time()}"
            )

        self.scheduler = Scheduler(batcher, dispatcher,
                                   list(self.worker_dict.values()), [])
        self.worker_queues = dict()

        self.psql_conn = psql_conn
        self.abyss_metadata = []
        self.s3_conn = s3_conn

        self._unpredicted_preprocessing_thread = threading.Thread(
            target=self._unpredicted_preprocessing, daemon=True)
        self._predictor_thread = threading.Thread(
            target=self._predict_decompressed_size, daemon=True)
        self._scheduler_thread = threading.Thread(
            target=self._thread_schedule_jobs, daemon=True)
        self._prefetcher_thread = threading.Thread(
            target=self._thread_prefetch, daemon=True)
        self._prefetcher_poll_thread = threading.Thread(
            target=self._thread_poll_prefetch, daemon=True)
        self._funcx_process_headers_thread = threading.Thread(
            target=self._thread_funcx_process_headers, daemon=True)
        self._funcx_decompress_thread = threading.Thread(
            target=self._thread_funcx_decompress, daemon=True)
        self._funcx_crawl_thread = threading.Thread(
            target=self._thread_funcx_crawl, daemon=True)
        self._funcx_poll_thread = threading.Thread(
            target=self._thread_funcx_poll, daemon=True)
        self._consolidate_results_thread = threading.Thread(
            target=self._thread_consolidate_crawl_results, daemon=True)
        self._lock = threading.Lock()
        self.thread_statuses = {
            "predictor_thread": True,
            "scheduler_thread": True,
            "prefetcher_thread": True,
            "prefetcher_poll_thread": True,
            "funcx_decompress_thread": True,
            "funcx_crawl_thread": True,
            "funcx_poll_thread": True,
            "consolidate_results_thread": True
        }

        self.funcx_client = FuncXClient()
        self.kill_status = False
        self.crawl_results = Queue()

    @staticmethod
    def validate_dict_params(orchestrator_params: Dict) -> None:
        """Ensures dictionary of orchestrator parameters contains
        necessary parameters.
        Parameters
        ----------
        orchestrator_params : dict
            Dictionary containing parameters for AbyssOrchestrator
            object.
        Returns
        -------
            Returns None if parameters are valid, raises error if
            invalid.
        """
        try:
            for parameter_name, parameter_type in REQUIRED_ORCHESTRATOR_PARAMETERS:
                parameter = orchestrator_params[parameter_name]
                assert isinstance(parameter, parameter_type)
        except AssertionError:
            raise ValueError(
                f"Parameter {parameter_name} is not of type {parameter_type}")
        except KeyError:
            raise ValueError(f"Required parameter {parameter_name} not found")

        worker_params = orchestrator_params["worker_params"]
        for worker_param in worker_params:
            Worker.validate_dict_params(worker_param)

    def start(self) -> None:
        threading.Thread(target=self._orchestrate).start()

    def _update_kill_status(self) -> None:
        """Checks whether all jobs are either succeeded or failed.
        Returns
        -------
        None
        """
        for status in JobStatus:
            if status in [JobStatus.SUCCEEDED, JobStatus.FAILED]:
                pass
            else:
                if not self.job_statuses[status].empty():
                    self.kill_status = False
                    return

        for status in self.thread_statuses.values():
            if status:
                self.kill_status = False
                return

        self.kill_status = True
        logger.info(f"KILL STATUS {self.kill_status}")

    def _update_psql_entry(self) -> None:
        """Updates a PostgreSQL entry with orchestration status. Assumes
        that a table entry has already been created.
        Returns
        -------
        """
        table_entry = dict()

        for job_status, job_queue in self.job_statuses.items():
            table_entry[job_status.value.lower()] = job_queue.qsize()

        logger.info(table_entry)
        logger.info(self.thread_statuses)

        for worker_id, worker in self.worker_dict.items():
            logger.info(
                f"{worker.worker_id} has {worker.curr_available_space}")

        update_table_entry(self.psql_conn, "abyss_status",
                           {"abyss_id": self.abyss_id}, **table_entry)

    def _orchestrate(self) -> None:
        """
        Step 1: Predict sizes of jobs using ML predictors
        Step 2: Batch jobs to worker using Batchers
        Step 3: Begin transferring files one at a time to each worker using
        one Prefetcher item per worker.
        Step 4: Constantly poll prefetcher for file completion.
        Step 5: When a file is done, send a funcx job request to crawl on worker
        Step 6: Poll funcx result
        Step 7: Pull result from sqs queue and validate/consolidate
        Returns
        -------
        None
        """
        logger.info("STARTING ORCHESTRATION")
        self._unpredicted_preprocessing_thread.start()
        self._predictor_thread.start()
        self._scheduler_thread.start()
        self._prefetcher_thread.start()
        self._prefetcher_poll_thread.start()
        self._funcx_process_headers_thread.start()
        self._funcx_decompress_thread.start()
        self._funcx_crawl_thread.start()
        self._funcx_poll_thread.start()
        self._consolidate_results_thread.start()

        t0 = time.time()
        while not self.kill_status:
            time.sleep(1)
            self._update_kill_status()
            self._update_psql_entry()
            logger.info(f"ELAPSED: {time.time() - t0}")

        self._unpredicted_preprocessing_thread.join()
        self._predictor_thread.join()
        self._scheduler_thread.join()
        self._prefetcher_thread.join()
        self._prefetcher_poll_thread.join()
        self._funcx_process_headers_thread.join()
        self._funcx_decompress_thread.join()
        self._funcx_crawl_thread.join()
        self._funcx_poll_thread.join()
        self._consolidate_results_thread.join()

        logger.info(f"PUSHING METADATA TO S3")

        # logger.info(metadata)

        metadata_file_path = os.path.join("/tmp", f"{self.abyss_id}.txt")

        with open(metadata_file_path, "w") as f:
            f.writelines("\n".join(
                [json.dumps(metadata) for metadata in self.abyss_metadata]))

        s3_upload_file(self.s3_conn, "xtract-abyss", metadata_file_path,
                       f"{self.abyss_id}.txt")

        os.remove(metadata_file_path)

    def _unpredicted_preprocessing(self) -> None:
        """Determines whether to use machine learning or file headers
        for decompressed size prediction and places jobs into respective
        queues.

        Returns
        -------
        None
        """
        while not self.kill_status:
            unpredicted_queue = self.job_statuses[JobStatus.UNPREDICTED]
            unpredicted_predict_queue = self.job_statuses[
                JobStatus.UNPREDICTED_PREDICT]
            unpredicted_schedule_queue = self.job_statuses[
                JobStatus.UNPREDICTED_SCHEDULE]

            while not unpredicted_queue.empty():
                self.thread_statuses["unpredicted_preprocessing_thread"] = True
                job = unpredicted_queue.get()

                # If a file is recursively compressed we will use machine learning to predict the file size.
                # We only use file headers if the compressed file is directly stored on our storage source.
                if self.prediction_mode == "ml" or job.status != JobStatus.UNPREDICTED:
                    if job.status == JobStatus.UNPREDICTED:
                        job.status = JobStatus.UNPREDICTED_PREDICT
                    unpredicted_predict_queue.put(job)
                    logger.info(
                        f"PLACING {job.file_path} IN UNPREDICTED PREDICT")
                elif self.prediction_mode == "header":
                    if job.file_path.endswith(
                            ".zip") or job.file_path.endswith(".tar"):
                        job.status = JobStatus.UNPREDICTED_SCHEDULE
                        unpredicted_schedule_queue.put(job)
                        logger.info(
                            f"PLACING {job.file_path} IN UNPREDICTED SCHEDULE")
                    else:
                        unpredicted_predict_queue.put(job)
                        logger.info(
                            f"PLACING {job.file_path} IN UNPREDICTED PREDICT")
                else:
                    self.kill_status = True
                    raise ValueError(
                        f"Unknown prediction mode \"{self.prediction_mode}\"")

                self.thread_statuses[
                    "unpredicted_preprocessing_thread"] = False

    def _predict_decompressed_size(self) -> None:
        """Runs decompression size predictions on all files in
        self.compressed_files and then places them in
        self.predicted_files.

        Returns
        -------
        None
        """
        while not self.kill_status:
            unpredicted_queue = self.job_statuses[
                JobStatus.UNPREDICTED_PREDICT]
            predicted_queue = self.job_statuses[JobStatus.PREDICTED]

            while not unpredicted_queue.empty():
                self.thread_statuses["predictor_thread"] = True
                job = unpredicted_queue.get()

                for job_node in job.bfs_iterator(include_root=True):
                    if job_node.status in [
                            JobStatus.UNPREDICTED,
                            JobStatus.UNPREDICTED_PREDICT
                    ]:
                        file_path = job_node.file_path
                        file_extension = Predictor.get_extension(file_path)

                        predictor = self.predictors[file_extension]

                        if job_node.decompressed_size:
                            decompressed_size = predictor.repredict(
                                job_node.decompressed_size)
                            logger.info(
                                f"REPREDICTED {job.file_path} WITH DECOMPRESSED SIZE {decompressed_size}"
                            )
                        else:
                            compressed_size = job_node.compressed_size
                            decompressed_size = predictor.predict(
                                file_path, compressed_size)
                            logger.info(
                                f"PREDICTED {job.file_path} WITH DECOMPRESSED SIZE {decompressed_size}"
                            )

                        with self._lock:
                            job_node.decompressed_size = decompressed_size
                            job_node.status = JobStatus.PREDICTED

                logger.info(
                    f"LATENCY PLACING {job.file_id} INTO PREDICTED AT {time.time()}"
                )
                predicted_queue.put(job)

            self.thread_statuses["predictor_thread"] = False

    def _thread_schedule_jobs(self) -> None:
        """Schedules items from self.predicted_files into
        worker queues in self.worker_queues.
        Returns
        -------
        None
        """
        while not self.kill_status:
            predicted_queue = self.job_statuses[JobStatus.PREDICTED]
            unpredicted_schedule_queue = self.job_statuses[
                JobStatus.UNPREDICTED_SCHEDULE]
            unpredicted_scheduled_queue = self.job_statuses[
                JobStatus.UNPREDICTED_SCHEDULED]
            scheduled_queue = self.job_statuses[JobStatus.SCHEDULED]
            failed_queue = self.job_statuses[JobStatus.FAILED]

            with self._lock:
                predicted_list = []
                while not predicted_queue.empty():
                    self.thread_statuses["scheduler_thread"] = True
                    job = predicted_queue.get()
                    logger.info(f"{job.file_path} SCHEDULING")
                    job.calculate_total_size()
                    predicted_list.append(job)

                while not unpredicted_schedule_queue.empty():
                    self.thread_statuses["scheduler_thread"] = True
                    job = unpredicted_schedule_queue.get()
                    logger.info(f"{job.file_path} UNPREDICTED SCHEDULING")
                    job.calculate_total_size()
                    predicted_list.append(job)

                self.scheduler.schedule_jobs(predicted_list)

                self.worker_queues = self.scheduler.worker_queues
                failed_jobs = self.scheduler.failed_jobs

                queue = None
                for job in predicted_list:
                    for job_node in job.bfs_iterator(include_root=True):
                        if job_node in failed_jobs:
                            job_node.status = JobStatus.FAILED
                            job_node.error = "Could not schedule"
                            logger.info(f"FAILED TO SCHEDULE {job.file_path}")
                        elif job_node.status == JobStatus.PREDICTED:
                            job_node.status = JobStatus.SCHEDULED
                            queue = JobStatus.SCHEDULED
                        elif job_node.status == JobStatus.UNPREDICTED_SCHEDULE:
                            job_node.status = JobStatus.UNPREDICTED_SCHEDULED
                            queue = JobStatus.UNPREDICTED_SCHEDULED

                    if queue:
                        if queue == JobStatus.SCHEDULED:
                            logger.info(
                                f"LATENCY PLACING {job.file_id} INTO SCHEDULED AT {time.time()}"
                            )
                            scheduled_queue.put(job)
                            logger.info(f"{job.file_path} SCHEDULED")
                        elif queue == JobStatus.UNPREDICTED_SCHEDULED:
                            unpredicted_scheduled_queue.put(job)
                            logger.info(
                                f"{job.file_path} UNPREDICTED SCHEDULED")
                    else:
                        logger.info(
                            f"LATENCY PLACING {job.file_id} INTO FAILED AT {time.time()}"
                        )
                        logger.info(f"{job.file_path} PLACED INTO FAILED")
                        failed_queue.put(job)
                self.thread_statuses["scheduler_thread"] = False

    def _thread_prefetch(self) -> None:
        """Places jobs into queue for prefetcher to transfer.
        Returns
        -------
        None
        """
        while not self.kill_status:
            scheduled_queue = self.job_statuses[JobStatus.SCHEDULED]
            unpredicted_scheduled_queue = self.job_statuses[
                JobStatus.UNPREDICTED_SCHEDULED]
            prefetching_queue = self.job_statuses[JobStatus.PREFETCHING]
            unpredicted_prefetching_queue = self.job_statuses[
                JobStatus.UNPREDICTED_PREFETCHING]

            with self._lock:
                for worker_id, worker_queue in self.worker_queues.items():
                    prefetcher = self.prefetchers[worker_id]
                    jobs_to_prefetch = []

                    while len(worker_queue):
                        self.thread_statuses["prefetcher_thread"] = True
                        job = worker_queue.popleft()
                        logger.info(f"{job.file_path} PREFETCHING")

                        worker_id = job.worker_id

                        jobs_to_prefetch.append(job)
                        job.transfer_path = f"{self.worker_dict[worker_id].transfer_dir}/{job.file_id}"

                        for job_node in job.bfs_iterator(include_root=True):
                            if job_node.status == JobStatus.SCHEDULED:
                                job_node.status = JobStatus.PREFETCHING
                            elif job_node.status == JobStatus.UNPREDICTED_SCHEDULED:
                                job_node.status = JobStatus.UNPREDICTED_PREFETCHING

                        if job.status == JobStatus.UNPREDICTED_PREFETCHING:
                            unpredicted_prefetching_queue.put(job)
                            unpredicted_scheduled_queue.get()
                            logger.info(
                                f"{job.file_path} PLACED INTO UNPREDICTED PREFETCHING"
                            )
                        else:
                            prefetching_queue.put(job)
                            scheduled_queue.get()
                            logger.info(
                                f"{job.file_path} PLACED INTO PREFETCHING")

                    prefetcher.transfer_job_batch(jobs_to_prefetch)

                    for job in jobs_to_prefetch:
                        logger.info(
                            f"LATENCY PLACING {job.file_id} INTO PREFETCHING AT {time.time()}"
                        )

            self.thread_statuses["prefetcher_thread"] = False
            time.sleep(4)

    def _thread_poll_prefetch(self) -> None:
        """Thread function to poll prefetcher and update
        self.job_statuses.
        Returns
        -------
        None
        """
        while not self.kill_status:
            prefetching_queue = self.job_statuses[JobStatus.PREFETCHING]
            unpredicted_prefetching_queue = self.job_statuses[
                JobStatus.UNPREDICTED_PREFETCHING]
            unpredicted_prefetched_queue = self.job_statuses[
                JobStatus.UNPREDICTED_PREFETCHED]
            prefetched_queue = self.job_statuses[JobStatus.PREFETCHED]
            failed_queue = self.job_statuses[JobStatus.FAILED]

            for _ in range(prefetching_queue.qsize() +
                           unpredicted_prefetching_queue.qsize()):
                self.thread_statuses["prefetcher_poll_thread"] = True

                if prefetching_queue.empty():
                    job = unpredicted_prefetching_queue.get()
                else:
                    job = prefetching_queue.get()

                logger.info(f"{job.file_path} POLL PREFETCH")
                file_path = job.file_path
                worker_id = job.worker_id
                prefetcher = self.prefetchers[worker_id]

                prefetcher_status = prefetcher.get_transfer_status(file_path)
                if prefetcher_status == PrefetcherStatuses.SUCCEEDED:
                    for job_node in job.bfs_iterator(include_root=True):
                        if job_node.status == JobStatus.PREFETCHING:
                            job_node.status = JobStatus.PREFETCHED
                        elif job_node.status == JobStatus.UNPREDICTED_PREFETCHING:
                            job_node.status = JobStatus.UNPREDICTED_PREFETCHED

                    if job.status == JobStatus.UNPREDICTED_PREFETCHED:
                        unpredicted_prefetched_queue.put(job)
                        logger.info(
                            f"{job.file_path} PLACED INTO UNPREDICTED PREFETCHED"
                        )
                    else:
                        prefetched_queue.put(job)
                        logger.info(
                            f"LATENCY PLACING {job.file_id} INTO PREFETCHED AT {time.time()}"
                        )
                        logger.info(f"{job.file_path} PLACED INTO PREFETCHED")
                elif prefetcher_status == PrefetcherStatuses.FAILED:
                    for job_node in job.bfs_iterator(include_root=True):
                        if job_node.status == JobStatus.PREFETCHING or job_node.status == JobStatus.UNPREDICTED_PREFETCHING:
                            job_node.status = JobStatus.FAILED
                    logger.info(f"{job.file_path} FAILED TO PREFETCH")
                    # Potentially add more logic here or in prefetcher to restart failed transfer
                    failed_queue.put(job)
                else:
                    if job.status == JobStatus.UNPREDICTED_PREFETCHING:
                        unpredicted_prefetching_queue.put(job)
                    else:
                        prefetching_queue.put(job)

            self.thread_statuses["prefetcher_poll_thread"] = False
            time.sleep(5)

    def _thread_funcx_process_headers(self) -> None:
        """Thread function to submit header processing tasks to funcX.

        Returns
        -------
        None
        """
        while not self.kill_status:
            unpredicted_prefetched_queue = self.job_statuses[
                JobStatus.UNPREDICTED_PREFETCHED]
            processing_headers_queue = self.job_statuses[
                JobStatus.PROCESSING_HEADERS]

            batch = self.funcx_client.create_batch()
            batched_jobs = []
            while not unpredicted_prefetched_queue.empty():
                self.thread_statuses["funcx_processing_headers_thread"] = True
                job = unpredicted_prefetched_queue.get()
                logger.info(f"{job.file_path} PROCESSING HEADERS")
                job_dict = Job.to_dict(job)
                worker_id = job.worker_id

                worker = self.worker_dict[worker_id]
                batch.add(job_dict,
                          endpoint_id=worker.funcx_eid,
                          function_id=PROCESS_HEADER_FUNCX_UUID)
                batched_jobs.append(job)

            if len(batch.tasks) > 0:
                batch_res = self.funcx_client.batch_run(batch)
            else:
                batch_res = None

            for idx, job in enumerate(batched_jobs):
                job.funcx_process_headers_id = batch_res[idx]
                job.status = JobStatus.PROCESSING_HEADERS

                processing_headers_queue.put(job)
                logger.info(f"{job.file_path} PROCESSING HEADERS QUEUE")

            time.sleep(5)

            self.thread_statuses["funcx_processing_headers_thread"] = False

    # TODO: Consolidate this and _thread_funcx_crawl into one function
    def _thread_funcx_decompress(self) -> None:
        """Thread function to submit decompression tasks to funcX.

        Returns
        -------
        None
        """
        while not self.kill_status:
            prefetched_queue = self.job_statuses[JobStatus.PREFETCHED]
            decompressing_queue = self.job_statuses[JobStatus.DECOMPRESSING]

            batch = self.funcx_client.create_batch()
            batched_jobs = []
            while not prefetched_queue.empty():
                self.thread_statuses["funcx_decompress_thread"] = True
                job = prefetched_queue.get()
                job_dict = Job.to_dict(job)
                worker_id = job.worker_id

                worker = self.worker_dict[worker_id]
                batch.add(job_dict,
                          worker.decompress_dir,
                          endpoint_id=worker.funcx_eid,
                          function_id=DECOMPRESSOR_FUNCX_UUID)
                batched_jobs.append(job)

            if len(batch.tasks) > 0:
                batch_res = self.funcx_client.batch_run(batch)
            else:
                batch_res = None

            for idx, job in enumerate(batched_jobs):
                logger.info(f"{job.file_path} DECOMPRESSING")
                for job_node in job.bfs_iterator(include_root=True):
                    job_node.funcx_decompress_id = batch_res[idx]
                    if job_node.status == JobStatus.PREFETCHED:
                        job_node.status = JobStatus.DECOMPRESSING

                decompressing_queue.put(job)
                logger.info(
                    f"LATENCY PLACING {job.file_id} INTO DECOMPRESSING AT {time.time()}"
                )

            time.sleep(5)

            self.thread_statuses["funcx_decompress_thread"] = False

    def _thread_funcx_crawl(self) -> None:
        """Thread function to submit crawl tasks to funcX.
        Returns
        -------
        None
        """
        while not self.kill_status:
            decompressed_queue = self.job_statuses[JobStatus.DECOMPRESSED]
            crawling_queue = self.job_statuses[JobStatus.CRAWLING]

            batch = self.funcx_client.create_batch()
            batched_jobs = []
            while not decompressed_queue.empty():
                self.thread_statuses["funcx_crawl_thread"] = True
                job = decompressed_queue.get()
                logger.info(f"{job.file_path} CRAWLING")
                job_dict = Job.to_dict(job)
                worker_id = job.worker_id

                worker = self.worker_dict[worker_id]
                batch.add(job_dict,
                          "",
                          endpoint_id=worker.funcx_eid,
                          function_id=LOCAL_CRAWLER_FUNCX_UUID)
                batched_jobs.append(job)

            if len(batch.tasks) > 0:
                batch_res = self.funcx_client.batch_run(batch)
            else:
                batch_res = None

            for idx, job in enumerate(batched_jobs):
                logger.info(
                    f"LATENCY PLACING {job.file_id} INTO CRAWLING AT {time.time()}"
                )
                for job_node in job.bfs_iterator(include_root=True):
                    job_node.funcx_crawl_id = batch_res[idx]
                    if job_node.status == JobStatus.DECOMPRESSED:
                        job_node.status = JobStatus.CRAWLING

                crawling_queue.put(job)

            time.sleep(5)

            self.thread_statuses["funcx_crawl_thread"] = False

    def _thread_funcx_poll(self) -> None:
        """Thread function to poll funcX for results.

        Returns
        -------
        None
        """
        unpredicted_queue = self.job_statuses[JobStatus.UNPREDICTED]
        decompressing_queue = self.job_statuses[JobStatus.DECOMPRESSING]
        decompressed_queue = self.job_statuses[JobStatus.DECOMPRESSED]
        crawling_queue = self.job_statuses[JobStatus.CRAWLING]
        processing_headers_queue = self.job_statuses[
            JobStatus.PROCESSING_HEADERS]
        predicted_queue = self.job_statuses[JobStatus.PREDICTED]
        consolidating_queue = self.job_statuses[JobStatus.CONSOLIDATING]
        failed_queue = self.job_statuses[JobStatus.FAILED]

        while not self.kill_status:
            processing_headers_funcx_ids = []
            processing_header_jobs = []
            while not processing_headers_queue.empty():
                self.thread_statuses["funcx_poll_thread"] = True
                job = processing_headers_queue.get()
                logger.info(f"{job.file_path} POLLING HEADER PROCESSING")
                processing_headers_funcx_ids.append(
                    job.funcx_process_headers_id)
                processing_header_jobs.append(job)

            processing_headers_statuses = self.funcx_client.get_batch_status(
                task_id_list=processing_headers_funcx_ids)
            for job in processing_header_jobs:
                worker = self.worker_dict[job.worker_id]
                job_status = processing_headers_statuses[
                    job.funcx_process_headers_id]

                if job_status["pending"]:
                    processing_headers_queue.put(job)
                elif job_status["status"] == "success":
                    logger.info(f"{job.file_path} COMPLETED HEADER PROCESSING")
                    job = Job.from_dict(job_status["result"])
                    job.status = JobStatus.PREDICTED

                    worker.curr_available_space += job.compressed_size
                    predicted_queue.put(job)
                elif job_status["status"] == "failed":
                    worker.curr_available_space += job.compressed_size
                    unpredicted_predict_queue = self.job_statuses[
                        JobStatus.UNPREDICTED_PREDICT]
                    job.status = JobStatus.UNPREDICTED_PREDICT
                    unpredicted_predict_queue.put(job)

            time.sleep(5)

            decompressing_funcx_ids = []
            decompressing_jobs = []
            while not decompressing_queue.empty():
                self.thread_statuses["funcx_poll_thread"] = True
                job = decompressing_queue.get()
                logger.info(f"{job.file_path} POLLING DECOMPRESS")
                decompressing_funcx_ids.append(job.funcx_decompress_id)
                decompressing_jobs.append(job)

            decompressing_statuses = self.funcx_client.get_batch_status(
                decompressing_funcx_ids)
            for job in decompressing_jobs:
                worker = self.worker_dict[job.worker_id]
                job_status = decompressing_statuses[job.funcx_decompress_id]
                logger.info(job_status)

                if job_status["pending"]:
                    decompressing_queue.put(job)
                elif job_status["status"] == "success":
                    job = Job.from_dict(job_status["result"])
                    logger.info(f"{job.file_path} COMPLETED DECOMPRESS")

                    if job.status == JobStatus.FAILED:
                        worker.curr_available_space += job.total_size
                        failed_queue.put(job)
                        logger.info(f"{job.file_path} PLACED INTO FAILED")
                        logger.info(
                            f"LATENCY PLACING {job.file_id} INTO FAILED AT {time.time()}"
                        )
                        continue

                    has_unpredicted = False
                    for job_node in job.bfs_iterator(include_root=True):
                        if job_node.status == JobStatus.DECOMPRESSING:
                            job_node.status = JobStatus.DECOMPRESSED
                        elif job_node.status == JobStatus.UNPREDICTED:
                            has_unpredicted = True

                    if has_unpredicted:
                        unpredicted_queue.put(job)
                        logger.info(
                            f"LATENCY PLACING {job.file_id} INTO UNPREDICTED AT {time.time()}"
                        )
                        logger.info(f"{job.file_path} PLACED INTO UNPREDICTED")

                    worker.curr_available_space += job.compressed_size

                    decompressed_queue.put(job)
                    logger.info(
                        f"LATENCY PLACING {job.file_id} INTO DECOMPRESSED AT {time.time()}"
                    )
                    logger.info(f"{job.file_path} PLACED INTO DECOMPRESSED")
                elif job_status["status"] == "failed":
                    worker.curr_available_space += job.compressed_size
                    logger.info(
                        f"ERROR for {job.file_path}: {job_status['exception']}"
                    )
                    logger.info(f"{job.file_path} PLACED INTO FAILED")
                    failed_queue.put(job)
                    logger.info(
                        f"LATENCY PLACING {job.file_id} INTO FAILED AT {time.time()}"
                    )

            time.sleep(5)

            crawling_funcx_ids = []
            crawling_jobs = []
            while not crawling_queue.empty():
                self.thread_statuses["funcx_poll_thread"] = True
                job = crawling_queue.get()
                logger.info(f"{job.file_path} POLLING CRAWL")
                crawling_funcx_ids.append(job.funcx_crawl_id)
                crawling_jobs.append(job)

            crawling_statuses = self.funcx_client.get_batch_status(
                crawling_funcx_ids)
            for job in crawling_jobs:
                worker = self.worker_dict[job.worker_id]
                job_status = crawling_statuses[job.funcx_crawl_id]

                if job_status["pending"]:
                    crawling_queue.put(job)
                elif job_status["status"] == "success":
                    result = job_status["result"]
                    job = Job.from_dict(result)
                    logger.info(f"{job.file_path} COMPLETED CRAWL")

                    for job_node in job.bfs_iterator(include_root=True):
                        if job_node.status == JobStatus.CRAWLING:
                            job_node.status = JobStatus.CONSOLIDATING

                    worker.curr_available_space += (job.total_size -
                                                    job.compressed_size)
                    consolidating_queue.put(job)
                    logger.info(
                        f"LATENCY PLACING {job.file_id} INTO CONSOLIDATING AT {time.time()}"
                    )
                    logger.info(f"{job.file_path} PLACED INTO CONSOLIDATING")
                elif job_status["status"] == "failed":
                    worker.curr_available_space += (job.total_size -
                                                    job.compressed_size)
                    failed_queue.put(job)
                    logger.info(f"{job.file_path} PLACED INTO FAILED")
                    logger.info(
                        f"LATENCY PLACING {job.file_id} INTO FAILED AT {time.time()}"
                    )

            time.sleep(5)

            self.thread_statuses["funcx_poll_thread"] = False

    def _thread_consolidate_crawl_results(self) -> None:
        """Thread function to consolidate crawl results and push to SQS.
        Returns
        -------
        None
        """
        while not self.kill_status:
            unpredicted_queue = self.job_statuses[JobStatus.UNPREDICTED]
            consolidating_queue = self.job_statuses[JobStatus.CONSOLIDATING]
            succeeded_queue = self.job_statuses[JobStatus.SUCCEEDED]
            failed_queue = self.job_statuses[JobStatus.FAILED]

            while not consolidating_queue.empty():
                self.thread_statuses["consolidate_results_thread"] = True
                job = consolidating_queue.get()
                logger.info(f"{job.file_path} CONSOLIDATING")

                resubmit_task = False
                for job_node in job.bfs_iterator(include_root=True):
                    root_path = job_node.metadata["root_path"]
                    for file_path, file_metadata in job_node.metadata[
                            "metadata"].items():
                        file_size = file_metadata["physical"]["size"]
                        is_compressed = file_metadata["physical"][
                            "is_compressed"]

                        child_file_path = os.path.join(root_path, file_path)

                        if is_compressed:
                            if "decompressed_size" in file_metadata[
                                    "physical"]:
                                decompressed_size = file_metadata["physical"][
                                    "decompressed_size"]
                            else:
                                decompressed_size = None
                            if child_file_path in job_node.child_jobs:
                                break
                            else:
                                child_job = Job(file_path=child_file_path,
                                                file_id=f"{str(uuid.uuid4())}",
                                                compressed_size=file_size)

                                if decompressed_size:
                                    child_job.decompressed_size = decompressed_size
                                    child_job.status = JobStatus.PREDICTED
                                else:
                                    child_job.status = JobStatus.UNPREDICTED

                                job_node.child_jobs[
                                    child_file_path] = child_job
                                resubmit_task = True

                if resubmit_task:
                    logger.info(f"{job.file_path} RESUBMITTING")
                    unpredicted_queue.put(job)
                    logger.info(
                        f"LATENCY PLACING {job.file_id} INTO UNPREDICTED AT {time.time()}"
                    )
                    continue

                consolidated_metadata = job.consolidate_metadata()
                self.abyss_metadata.append(consolidated_metadata)

                for job_node in job.bfs_iterator(include_root=True):
                    if job_node.status == JobStatus.CONSOLIDATING:
                        job_node.status = JobStatus.SUCCEEDED

                succeeded_queue.put(job)
                logger.info(f"{job.file_path} PLACED INTO SUCCEEDED")
                logger.info(
                    f"LATENCY PLACING {job.file_id} INTO SUCCEEDED AT {time.time()}"
                )

            while not failed_queue.empty():
                job = failed_queue.get()
                logger.info(f"{job.file_path} CONSOLIDATING FROM FAILED")
                consolidated_metadata = job.consolidate_metadata()
                self.abyss_metadata.append(consolidated_metadata)
                succeeded_queue.put(job)
                logger.info(
                    f"LATENCY PLACING {job.file_id} INTO SUCCEEDED AT {time.time()}"
                )

            self.thread_statuses["consolidate_results_thread"] = False
Beispiel #24
0
def async_fxc(fxc_args):
    fxc = FuncXClient(**fxc_args, asynchronous=True)
    return fxc
Beispiel #25
0
def fxc(funcx_test_config):
    client_args = funcx_test_config["client_args"]
    fxc = FuncXClient(**client_args)
    fxc.throttling_enabled = False
    return fxc
Beispiel #26
0
                           endpoint_id=ep_id,
                           function_id=fn_uuid)
    delta = time.time() - start
    print("Time to launch {} tasks: {:8.3f} s".format(task_count, delta))
    print("Got {} tasks_ids ".format(len(task_ids)))

    for i in range(3):
        x = fxc.get_batch_status(task_ids)
        complete_count = sum(
            [1 for t in task_ids if t in x and x[t].get('pending', False)])
        print("Batch status : {}/{} complete".format(complete_count,
                                                     len(task_ids)))
        if complete_count == len(task_ids):
            break
        time.sleep(2)

    delta = time.time() - start
    print("Time to complete {} tasks: {:8.3f} s".format(task_count, delta))
    print("Throughput : {:8.3f} Tasks/s".format(task_count / delta))


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", "--endpoint", required=True)
    parser.add_argument("-c", "--count", default="10")
    args = parser.parse_args()

    print("FuncX version : ", funcx.__version__)
    fxc = FuncXClient(funcx_service_address='https://dev.funcx.org/api/v1')
    test(fxc, args.endpoint, task_count=int(args.count))
Beispiel #27
0
    no_local_server=True,
    no_browser=True)

auth_token = tokens["petrel_https_server"]['access_token']
transfer_token = tokens['transfer.api.globus.org']['access_token']
funcx_token = tokens['funcx_service']['access_token']

headers = {'Authorization': f"Bearer {funcx_token}", 'Transfer': transfer_token, 'FuncX': funcx_token, 'Petrel': auth_token}
print(f"Headers: {headers}")


def hello_world(event):
    return "Hello World!"


fxc = FuncXClient()

func_uuid = fxc.register_function(hello_world)
print(func_uuid)
event = None

endpoint = '68bade94-bf58-4a7a-bfeb-9c6a61fa5443'


items_to_batch = [{"func_id": func_uuid, "event": {}}, {"func_id": func_uuid, "event": {}}]
x = remote_extract_batch(items_to_batch, endpoint, headers=headers)

fx_ser = FuncXSerializer()

import time
while True:
Beispiel #28
0
def test(event):
    import os
    return os.environ['container_version']


def main(fxc, ep_id):
    container_uuid = fxc.register_container('/home/tskluzac/ext_repos/xtract-keyword/xtract-keyword.img', 'singularity')
    print("Container UUID: {}".format(container_uuid))
    fn_uuid = fxc.register_function(base_extractor,
                                    #ep_id, # TODO: We do not need ep id here
                                    container_uuid=container_uuid,
                                    description="Tabular test function.")
    print("FN_UUID : ", fn_uuid)
    res = fxc.run(tabular_event,
                  endpoint_id=ep_id, function_id=fn_uuid)
    print(res)
    for i in range(100):
        try:
            x = fxc.get_result(res)
            print(x)
            break
        except Exception as e:
            print("Exception: {}".format(e))
            time.sleep(2)


if __name__ == "__main__":
    fxc = FuncXClient()
    main(fxc, "e1398319-0d0f-4188-909b-a978f6fc5621")
Beispiel #29
0
from funcx import FuncXClient
import time
from queue import Queue
from extractors.xtract_matio import matio_extract

fxc = FuncXClient()

# id_list = ['63525bf3-b894-4571-9976-fd675932db46']
# id_list = ['63525bf3-b894-4571-9976-fd675932db46']
# id_list = ['dbc7a749-f689-419c-b114-2f9eb8146496']
id_list = ['c8f24648-6b96-4d58-ac14-93ccf81da12c']

def sleep_func(file_ls):
    import time
    #
    # # for item in file_ls:
    # #     with open(item, 'r') as f:
    # #         f.close()
    #
    # time.sleep(sleep_s)
    return "hello, world!"


# func_id = fxc.register_function(function=sleep_func, function_name='hpdc_sleep_extractor')
container_uuid = fxc.register_container('/home/tskluzac/xtract-matio.img', 'singularity')
print("Container UUID: {}".format(container_uuid))
func_id = fxc.register_function(matio_extract,
                                #ep_id, # TODO: We do not need ep id here
                                container_uuid=container_uuid,
                                description="New sum function defined without")