Beispiel #1
0
def main():
    args = get_args()
    client = Client('127.0.0.1:8786')
    ncores = sum(client.ncores().values())
    pd.set_option('display.large_repr', 'truncate')
    pd.set_option('display.max_columns',
                  0)  # noqa pd.set_option('display.max_rows', 1000)  # noqa
    cann_group_df = make_cann_group_df(num_products=100)
    df = read_df(args, cann_group_df['productKey'])
    logger.info('Setting index')
    df = df.set_index('customerKey', drop=True)
    logger.info('Repartitioning')
    df = df.repartition(npartitions=ncores)
    logger.info('Mapping Cann Group')
    df['cannGroupKey'] = df['productKey'].map(cann_group_df['cannGroupKey'])
    logger.info('Persisting')
    df = client.persist(df)
    logger.info('Cann Groups')
    for cann_group_key in cann_group_df['cannGroupKey'].unique().tolist():
        print('Filtering Cann Group %s' % cann_group_key)
        cann_df = df[df['cannGroupKey'] == cann_group_key]
        print('This df: %s' % (len(cann_df), ))
        with Timer('%s' % (cann_group_key, )):
            calculate_switching(cann_df)
        return
Beispiel #2
0
def init_client():
    import argparse
    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument('-scheduler', required=False)
    parser.add_argument('-expect-workers', type=int, default=0)
    args, unknown = parser.parse_known_args()

    if args.scheduler:
        from dask.distributed import Client
        client = Client(args.scheduler)
        if args.expect_workers > 0:
            while True:
                num_workers = len(client.ncores())
                if num_workers >= args.expect_workers:
                    break
                print(
                    'Client waiting for workers (have %s expect %s)' %
                    (num_workers, args.expect_workers),
                    flush=True)
                import time
                time.sleep(5)
    else:
        client = None

    return client
Beispiel #3
0
def get_total_cores(cluster: SpecCluster, client: Client) -> int:
    """
    Retrieve the total number of cores from a Dask cluster object.
    """
    # The Client.ncores() method returns the number of cores of each Dask
    # worker that is known to the scheduler
    return sum(client.ncores().values())
Beispiel #4
0
def map_wrapper(function_item, list_items):
    from dask.distributed import Client
    import dask.bag as db
    c = Client()
    NCORES = len(c.ncores().values())
    b0 = db.from_sequence(list_items, npartitions=NCORES)
    list_items = list(db.map(function_item, b0).compute())
    return list_items
Beispiel #5
0
def pool_broadcast(client: Client, action: Any, *args: List[Any],
                   **kwargs: Dict[str, Any]):
    """Call ``action(*args, **kwargs)`` on every worker thread.

    This function block until all tasks are complete, expectation is
    that this is called at the very beginning on an empty pool, if called
    on a busy pool this will block until all active tasks are complete.

    Broadcast is achieved by blocking every task until all tasks have started,
    every worker does the following:

    1. Let the primary task know this task has started
    2. Perform action
    3. Wait for all other tasks to start
    4. Finish

    Steps (1) and (3) are achieved using distributed Queues, step (1) is a
    non-blocking ``put`` and step (3) is a blocking ``get``.

    :param client: Dask client object
    :param action: Callable `action(*args, **kwargs)`
    :param args: Ordered arguments to action
    :param kwargs: Named arguments to action

    """
    postfix = "-{:02x}".format(randint(0, 1 << 64))
    total_worker_threads = sum(client.ncores().values())
    q1 = Queue("q1" + postfix, client=client, maxsize=total_worker_threads)
    q2 = Queue("q2" + postfix, client=client, maxsize=total_worker_threads)

    ff = [
        client.submit(
            _bcast_action,
            q1,
            q2,
            i,
            action,
            args,
            kwargs,
            key="broadcast_action_{:04d}{}".format(i, postfix),
        ) for i in range(total_worker_threads)
    ]

    tks = set()
    for _ in range(total_worker_threads):
        tks.add(q1.get())  # blocking

    assert len(tks) == total_worker_threads

    # at this point all workers have launched
    # allow them to continue
    for i in range(total_worker_threads):
        q2.put(i)  # should not block

    # block until all done and return result
    return [f.result() for f in ff]
def map_wrapper(function_item,list_items,other_args=None):
    from dask.distributed import Client
    import dask.bag as db
    c = Client()
    NCORES = len(c.ncores().values())-2
    b0 = db.from_sequence(list_items, npartitions=NCORES)
    if other_args is not None:
        list_items = list(db.map(function_item,b0,other_args).compute())
    else:
        list_items = list(db.map(function_item,b0).compute())
    return list_items
def run_dask_compute(h5_main):
    raw_data = h5_main[()]
    #cpu_cores = int(cpu_cores/8)
    #dask_raw_data = da.from_array(raw_data, chunks='auto')
    #cluster = LocalCluster(n_workers=cpu_cores/8)
    #client = Client(cluster, processes=True)
    #map = dask_raw_data.map_blocks(find_all_peaks, [20, 60], num_steps=30)
    #results = map.compute()
    client = Client(processes=False)
    dask_raw_data = client.scatter(raw_data)
    args = [[20, 60]]
    kwargs = {'num_steps': 30}
    L = client.submit(find_all_peaks, dask_raw_data, args, kwargs)
    dask_results = client.compute(L)
    cores = client.ncores()
    client.close()
    return cores
def run_dask_compute(h5_main, proc=True):
    raw_data = h5_main[()]
    #cpu_cores = int(cpu_cores/8)
    dask_raw_data = da.from_array(raw_data, chunks='auto')
    #cluster = LocalCluster(n_workers=cpu_cores/8)
    #client = Client(cluster, processes=True)
    #map = dask_raw_data.map_blocks(find_all_peaks, [20, 60], num_steps=30)
    #results = map.compute()
    client = Client(processes=proc)
    L = client.map(find_all_peaks,
                   dask_raw_data,
                   width_bounds=[20, 60],
                   num_steps=30)
    dask_results = client.gather(L)
    cores = client.ncores()
    client.close()
    return results
def main():
    #print('XGBOOST_BUILD_DOC is ' + os.environ['XGBOOST_BUILD_DOC'])
    parser = argparse.ArgumentParser("rapidssample")
    parser.add_argument("--data_dir", type=str, help="location of data")
    parser.add_argument("--num_gpu", type=int, help="Number of GPUs to use", default=1)
    parser.add_argument("--part_count", type=int, help="Number of data files to train against", default=2)
    parser.add_argument("--end_year", type=int, help="Year to end the data load", default=2000)
    parser.add_argument("--cpu_predictor", type=str, help="Flag to use CPU for prediction", default='False')
    parser.add_argument('-f', type=str, default='') # added for notebook execution scenarios
    args = parser.parse_args()
    data_dir = args.data_dir
    num_gpu = args.num_gpu
    part_count = args.part_count
    end_year = args.end_year
    cpu_predictor = args.cpu_predictor.lower() in ('yes', 'true', 't', 'y', '1')

    if cpu_predictor:
        print('Training with CPUs require num gpu = 1')
        num_gpu = 1

    print('data_dir = {0}'.format(data_dir))
    print('num_gpu = {0}'.format(num_gpu))
    print('part_count = {0}'.format(part_count))
    #part_count = part_count + 1 # adding one because the usage below is not inclusive
    print('end_year = {0}'.format(end_year))
    print('cpu_predictor = {0}'.format(cpu_predictor))
    
    import subprocess

    cmd = "hostname --all-ip-addresses"
    process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    IPADDR = str(output.decode()).split()[0]
    
    cluster = LocalCUDACluster(ip=IPADDR,n_workers=num_gpu)
    client = Client(cluster)
    client
    print(client.ncores())

# to download data for this notebook, visit https://rapidsai.github.io/demos/datasets/mortgage-data and update the following paths accordingly
    acq_data_path = "{0}/acq".format(data_dir) #"/rapids/data/mortgage/acq"
    perf_data_path = "{0}/perf".format(data_dir) #"/rapids/data/mortgage/perf"
    col_names_path = "{0}/names.csv".format(data_dir) # "/rapids/data/mortgage/names.csv"
    start_year = 2000
#end_year = 2000 # end_year is inclusive -- converted to parameter
#part_count = 2 # the number of data files to train against -- converted to parameter

    client.run(initialize_rmm_pool)
    client
    print(client.ncores())
# NOTE: The ETL calculates additional features which are then dropped before creating the XGBoost DMatrix.
# This can be optimized to avoid calculating the dropped features.
    print("Reading ...")
    t1 = datetime.datetime.now()
    gpu_dfs = []
    gpu_time = 0
    quarter = 1
    year = start_year
    count = 0
    while year <= end_year:
        for file in glob(os.path.join(perf_data_path + "/Performance_" + str(year) + "Q" + str(quarter) + "*")):
            if count < part_count:
                gpu_dfs.append(process_quarter_gpu(client, col_names_path, acq_data_path, year=year, quarter=quarter, perf_file=file))
                count += 1
                print('file: {0}'.format(file))
                print('count: {0}'.format(count))
        quarter += 1
        if quarter == 5:
            year += 1
            quarter = 1
            
    wait(gpu_dfs)
    t2 = datetime.datetime.now()
    print("Reading time ...")
    print(t2-t1)
    print('len(gpu_dfs) is {0}'.format(len(gpu_dfs)))
    
    client.run(cudf._gdf.rmm_finalize)
    client.run(initialize_rmm_no_pool)
    client
    print(client.ncores())
    dxgb_gpu_params = {
        'nround':            100,
        'max_depth':         8,
        'max_leaves':        2**8,
        'alpha':             0.9,
        'eta':               0.1,
        'gamma':             0.1,
        'learning_rate':     0.1,
        'subsample':         1,
        'reg_lambda':        1,
        'scale_pos_weight':  2,
        'min_child_weight':  30,
        'tree_method':       'gpu_hist',
        'n_gpus':            1, 
        'distributed_dask':  True,
        'loss':              'ls',
        'objective':         'gpu:reg:linear',
        'max_features':      'auto',
        'criterion':         'friedman_mse',
        'grow_policy':       'lossguide',
        'verbose':           True
    }
      
    if cpu_predictor:
        print('Training using CPUs')
        dxgb_gpu_params['predictor'] = 'cpu_predictor'
        dxgb_gpu_params['tree_method'] = 'hist'
        dxgb_gpu_params['objective'] = 'reg:linear'
        
    else:
        print('Training using GPUs')
    
    print('Training parameters are {0}'.format(dxgb_gpu_params))
    
    gpu_dfs = [delayed(DataFrame.from_arrow)(gpu_df) for gpu_df in gpu_dfs[:part_count]]
    gpu_dfs = [gpu_df for gpu_df in gpu_dfs]
    wait(gpu_dfs)
    
    tmp_map = [(gpu_df, list(client.who_has(gpu_df).values())[0]) for gpu_df in gpu_dfs]
    new_map = {}
    for key, value in tmp_map:
        if value not in new_map:
            new_map[value] = [key]
        else:
            new_map[value].append(key)
    
    del(tmp_map)
    gpu_dfs = []
    for list_delayed in new_map.values():
        gpu_dfs.append(delayed(cudf.concat)(list_delayed))
    
    del(new_map)
    gpu_dfs = [(gpu_df[['delinquency_12']], gpu_df[delayed(list)(gpu_df.columns.difference(['delinquency_12']))]) for gpu_df in gpu_dfs]
    gpu_dfs = [(gpu_df[0].persist(), gpu_df[1].persist()) for gpu_df in gpu_dfs]
    
    gpu_dfs = [dask.delayed(xgb.DMatrix)(gpu_df[1], gpu_df[0]) for gpu_df in gpu_dfs]
    gpu_dfs = [gpu_df.persist() for gpu_df in gpu_dfs]
    gc.collect()
    wait(gpu_dfs)
    
    labels = None
    t1 = datetime.datetime.now()
    bst = dxgb_gpu.train(client, dxgb_gpu_params, gpu_dfs, labels, num_boost_round=dxgb_gpu_params['nround'])
    t2 = datetime.datetime.now()
    print("Training time ...")
    print(t2-t1)
    print('str(bst) is {0}'.format(str(bst)))
    print('Exiting script')
Beispiel #10
0
from jmetal.util.observer import ProgressBarObserver, VisualizerObserver
from jmetal.util.termination_criterion import StoppingByEvaluations
from pymsa.core.score import SumOfPairs, PercentageOfTotallyConservedColumns

from sequoya.algorithm.multiobjective.nsgaii import DistributedNSGAII
from sequoya.operator import SPXMSA, ShiftClosedGapGroups
from sequoya.problem import BAliBASE
from sequoya.util.solution import get_representative_set
from sequoya.util.visualization import MSAPlot

if __name__ == '__main__':
    # setup Dask client (web interface will be initialized at http://127.0.0.1:8787/workers)
    cluster = LocalCluster(n_workers=4, processes=True)
    client = Client(cluster)

    ncores = sum(client.ncores().values())
    print(f'{ncores} cores available')

    # creates the problem
    problem = BAliBASE(
        instance='BB20019',
        path='../resources',
        score_list=[SumOfPairs(),
                    PercentageOfTotallyConservedColumns()])

    # creates the algorithm
    max_evaluations = 200000
    reference_point = [-175000, -1.35]

    algorithm = DistributedNSGAII(
        problem=problem,
Beispiel #11
0
import dask
import numpy as np

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(add_help=False)
    parser.add_argument('-scheduler', required=False)
    parser.add_argument('-expect-workers', type=int, default=0)
    args, unknown = parser.parse_known_args()

    if args.scheduler:
        from dask.distributed import Client
        client = Client(args.scheduler)
        if args.expect_workers > 0:
            while True:
                num_workers = len(client.ncores())
                if num_workers >= args.expect_workers:
                    break
                print('Client waiting for workers (have %s expect %s)' % (num_workers, args.expect_workers), flush=True)
                import time
                time.sleep(5)

def encode_task_graph(graph):
    from task_bench_core import ffi, c
    return np.frombuffer(ffi.buffer(ffi.addressof(graph), ffi.sizeof(graph)), dtype=np.ubyte)


def decode_task_graph(graph_array):
    from task_bench_core import ffi, c
    return ffi.cast("task_graph_t *", graph_array.ctypes.data)[0]
import s3fs
import dask.dataframe as dd
import dask.distributed
import os
from time import ctime

s3url = os.environ['s3url']
schurl = os.environ['schurl']

print(ctime(), 'running daskclientapp.. hang tight...')

df = dd.read_csv(s3url, storage_options={'anon': True})

from dask.distributed import Client
client = Client(schurl)

print(ctime(), client.ncores())

print(ctime(), df.head())

# modify the below statement based on your s3 dataset

dfg = df.groupby('VendorID').agg({
    'passenger_count': 'count',
    'trip_distance': 'sum'
}).astype(int).reset_index().rename(columns={
    'passenger_count': 'Trip Count'
}).compute()
print(ctime(), dfg)
Beispiel #13
0
class ProcessManager(GenericProcessManager):
    manager: "ProcessManager" = None

    @classmethod
    def getManager(cls) -> Optional["ProcessManager"]:
        return cls.manager

    @classmethod
    def initManager(cls, serverConfiguration: Dict[str,
                                                   str]) -> "ProcessManager":
        if cls.manager is None:
            cls.manager = ProcessManager(serverConfiguration)
        return cls.manager

    def __init__(self, serverConfiguration: Dict[str, str]):
        self.config = serverConfiguration
        self.logger = EDASLogger.getLogger()
        self.num_wps_requests = 0
        self.scheduler_address = serverConfiguration.get(
            "scheduler.address", None)
        self.submitters = []
        self.active = True
        if self.scheduler_address is not None:
            self.logger.info(
                "Initializing Dask-distributed cluster with scheduler address: "
                + self.scheduler_address)
            self.client = Client(self.scheduler_address, timeout=60)
        else:
            nWorkers = int(
                self.config.get("dask.nworkers", multiprocessing.cpu_count()))
            self.client = Client(LocalCluster(n_workers=nWorkers))
            self.scheduler_address = self.client.scheduler.address
            self.logger.info(
                f"Initializing Local Dask cluster with {nWorkers} workers,  scheduler address = {self.scheduler_address}"
            )
            self.client.submit(lambda x: edasOpManager.buildIndices(x),
                               nWorkers)
        self.ncores = self.client.ncores()
        self.logger.info(f" ncores: {self.ncores}")
        self.scheduler_info = self.client.scheduler_info()
        self.workers: Dict = self.scheduler_info.pop("workers")
        self.logger.info(f" workers: {self.workers}")
        log_metrics = serverConfiguration.get("log.scheduler.metrics", False)
        if log_metrics:
            self.metricsThread = Thread(target=self.trackMetrics)
            self.metricsThread.start()

    def getCWTMetrics(self) -> Dict:
        metrics_data = {
            key: {}
            for key in [
                'user_jobs_queued', 'user_jobs_running', 'wps_requests',
                'cpu_ave', 'cpu_count', 'memory_usage', 'memory_available'
            ]
        }
        metrics = self.getProfileData()
        counts = metrics["counts"]
        workers = metrics["workers"]
        for key in [
                'tasks', 'processing', 'released', 'memory', 'saturated',
                'waiting', 'waiting_data', 'unrunnable'
        ]:
            metrics_data['user_jobs_running'][key] = counts[key]
        for key in ['tasks', 'waiting', 'waiting_data', 'unrunnable']:
            metrics_data['user_jobs_queued'][key] = counts[key]
        for wId, wData in workers.items():
            worker_metrics = wData["metrics"]
            total_memory = wData["memory_limit"]
            memory_usage = worker_metrics["memory"]
            metrics_data['memory_usage'][wId] = memory_usage
            metrics_data['memory_available'][wId] = total_memory - memory_usage
            metrics_data['cpu_count'][wId] = wData["ncores"]
            metrics_data['cpu_ave'][wId] = worker_metrics["cpu"]
        return metrics_data

    def trackMetrics(self, sleepTime=1.0):
        isIdle = False
        self.logger.info(f" ** TRACKING METRICS ** ")
        while self.active:
            metrics = self.getProfileData()
            counts = metrics["counts"]
            if counts['processing'] == 0:
                if not isIdle:
                    self.logger.info(f" ** CLUSTER IS IDLE ** ")
                    isIdle = True
            else:
                isIdle = False
                self.logger.info(f" METRICS: {metrics['counts']} ")
                workers = metrics["workers"]
                for key, value in workers.items():
                    self.logger.info(f" *** {key}: {value}")
                self.logger.info(f" HEALTH: {self.getHealth()}")
                time.sleep(sleepTime)

    def getWorkerMetrics(self):
        metrics = {}
        wkeys = ['ncores', 'memory_limit', 'last_seen', 'metrics']
        scheduler_info = self.client.scheduler_info()
        workers: Dict = scheduler_info.get("workers", {})
        for iW, worker in enumerate(workers.values()):
            metrics[f"W{iW}"] = {wkey: worker[wkey] for wkey in wkeys}
        return metrics

    def getDashboardAddress(self):
        stoks = self.scheduler_address.split(":")
        host_address = stoks[-2].strip("/")
        return f"http://{host_address}:8787"

    def getCounts(self) -> Dict:
        profile_address = f"{self.getDashboardAddress()}/json/counts.json"
        return requests.get(profile_address).json()

    def getHealth(self, mtype: str = "") -> str:
        profile_address = f"{self.getDashboardAddress()}/health"
        return requests.get(profile_address).text

    def getMetrics(self, mtype: str = "") -> Optional[Dict]:
        counts = self.getCounts()
        if counts['processing'] == 0: return None
        mtypes = mtype.split(",")
        metrics = {"counts": counts}
        if "processing" in mtypes:
            metrics["processing"] = self.client.processing()
        if "profile" in mtypes: metrics["profile"] = self.client.profile()
        return metrics

    def getProfileData(self, mtype: str = "") -> Dict:
        try:
            return {
                "counts": self.getCounts(),
                "workers": self.getWorkerMetrics()
            }
        except Exception as err:
            self.logger.error("Error in getProfileData")
            self.logger.error(traceback.format_exc())

        # response2: requests.Response = requests.get(tasks_address)
        # print(f"\n  ---->  Tasks Data from {tasks_address}: \n **  {response2.text} ** \n" )
        # response3: requests.Response = requests.get(workers_address)
        # print(f"\n  ---->  Workers Data from {workers_address}: \n **  {response3.text} ** \n" )


#      data = json.loads(counts)

# (r"info/main/workers.html", Workers),
# (r"info/worker/(.*).html", Worker),
# (r"info/task/(.*).html", Task),
# (r"info/main/logs.html", Logs),
# (r"info/call-stacks/(.*).html", WorkerCallStacks),
# (r"info/call-stack/(.*).html", TaskCallStack),
# (r"info/logs/(.*).html", WorkerLogs),
# (r"json/counts.json", CountsJSON),
# (r"json/identity.json", IdentityJSON),
# (r"json/index.html", IndexJSON),
# (r"individual-plots.json", IndividualPlots),
# (r"metrics", PrometheusHandler),
# (r"health", HealthHandler),

# "/system": systemmonitor_doc,
# "/stealing": stealing_doc,
# "/workers": workers_doc,
# "/events": events_doc,
# "/counters": counters_doc,
# "/tasks": tasks_doc,
# "/status": status_doc,
# "/profile": profile_doc,
# "/profile-server": profile_server_doc,
# "/graph": graph_doc,
# "/individual-task-stream": individual_task_stream_doc,
# "/individual-progress": individual_progress_doc,
# "/individual-graph": individual_graph_doc,
# "/individual-profile": individual_profile_doc,
# "/individual-profile-server": individual_profile_server_doc,
# "/individual-nbytes": individual_nbytes_doc,
# "/individual-nprocessing": individual_nprocessing_doc,
# "/individual-workers": individual_workers_doc,

    def term(self):
        self.active = False
        self.client.close()

    def runProcess(self, job: Job) -> EDASDataset:
        start_time = time.time()
        try:
            self.logger.info(
                f"Running workflow for requestId: {job.requestId}, scheduler: {self.scheduler_address}"
            )
            result = edasOpManager.buildTask(job)
            self.logger.info("Completed EDAS workflow in time " +
                             str(time.time() - start_time))
            return result
        except Exception as err:
            self.logger.error("Execution error: " + str(err))
            traceback.print_exc()

    def submitProcess(self, service: str, job: Job,
                      resultHandler: ExecHandler):
        submitter: SubmissionThread = SubmissionThread(job, resultHandler)
        self.submitters.append(submitter)
        submitter.start()
Beispiel #14
0
    'DoubleMuon': [
        'root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012B_DoubleMuParked.root',
        'root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/Run2012C_DoubleMuParked.root',
    ],
    'ZZ to 4mu': [
        'root://eospublic.cern.ch//eos/root-eos/cms_opendata_2012_nanoaod/ZZTo4mu.root'
    ]
}

exe_args = {
    'client': client,
    'savemetrics': True,
    'schema': processor.NanoEvents,
    'align_clusters': True
}

while len(client.ncores()) < 4:
    print('Waiting for more cores to spin up, currently there are {0} available...'.format(len(client.ncores())))
    print('Dask client info ->', client)
    time.sleep(10)

proc = MyProcessor()

hists = processor.run_uproot_job(fileset,
                                 treename="Events",
                                 processor_instance=proc,
                                 executor=processor.dask_executor,
                                 executor_args=exe_args)

print(hists)
Beispiel #15
0
def open():
    scheduler = Client(schedulerAddress)
    #scheduler = Client('127.0.0.1:8786')
    print("Running on %d cores" % sum(scheduler.ncores().values()))
    return scheduler
                        default="ips.txt",
                        help="location of the nodes")
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()

    # setup the game
    game, agents, ratings = game_setup(args.num_agents)

    # run the game matches on the cluster
    nodes = get_nodes(args.ip_file)
    print("Connecting to cluster scheduler {} with workers:".format(nodes[0]))
    client = Client(nodes[0] + ':8786')
    for worker, cores in client.ncores().items():
        print("{:>35} {} cores".format(worker, cores))
    client.upload_file('game.py')

    start = default_timer()
    matches = run_games(game, agents, args.num_matches, client)
    check_status(matches)
    print("Game run in {:.2f}".format(default_timer() - start))

    # here we could do something with failed matches (errors)

    # run rating evaluations
    start = default_timer()
    compute_ratings(matches, ratings)
    print("Skills computed in {:.2f}".format(default_timer() - start))