Example #1
0
def gpu_info():
    """Return a list of namedtuples representing attributes of each GPU
    device.
    """

    GPUInfo = namedtuple('GPUInfo', ['name', 'driver', 'totalmem', 'freemem'])
    gpus = GPUtil.getGPUs()
    info = []
    for g in gpus:
        info.append(GPUInfo(g.name, g.driver, g.memoryTotal, g.memoryFree))
    return info
Example #2
0
def gpu_load(wproc=0.5, wmem=0.5):
    """Return a list of namedtuples representing the current load for
    each GPU device. The processor and memory loads are fractions
    between 0 and 1. The weighted load represents a weighted average
    of processor and memory loads using the parameters `wproc` and
    `wmem` respectively.
    """

    GPULoad = namedtuple('GPULoad', ['processor', 'memory', 'weighted'])
    gpus = GPUtil.getGPUs()
    load = []
    for g in gpus:
        wload = (wproc * g.load + wmem * g.memoryUtil) / (wproc + wmem)
        load.append(GPULoad(g.load, g.memoryUtil, wload))
    return load
Example #3
0
def main():
    max_devices = 16
    # Check which devices we have locally
    available_devices = GPUtil.getAvailable(limit=max_devices)
    # Use one worker per device
    cluster = LocalCluster(n_workers=len(available_devices), threads_per_worker=4)
    client = Client(cluster)

    # Set up a relatively large regression problem
    n = 100
    m = 10000000
    partition_size = 100000
    X = da.random.random((m, n), partition_size)
    y = da.random.random(m, partition_size)

    xgb.dask.run(client, train, X, y, available_devices)
Example #4
0
def is_nvidia_gpu_present():
    try:
        import GPUtil
    except ImportError:  # py36 ModuleNotFoundError
        try:
            import gpu_dfcc
        except ImportError:  # py36 ModuleNotFoundError
            # who knows?
            return False
        else:
            return gpu_dfcc.cudaGetDeviceCount() > 0
    else:
        try:
            ngpu = len(GPUtil.getGPUs())
        except OSError:  # py3 FileNotFoundError
            # no `nvidia-smi`
            return False
        else:
            return ngpu > 0
Example #5
0
'''
Created on 1 Mar 2018

@author: lbtanh
'''
# usage gpu:
import GPUtil
GPUtil.showUtilization()
Example #6
0
def run_evaluation_one_dataset(idx, area_ini, training_root_dir, template_dir):

    curr_dir = os.getcwd()

    run_eval_dir = os.path.basename(area_ini)[:-4] + '_%d' % idx
    main_para = 'main_para_eval_on_testData.ini'
    area_ini_name = os.path.basename(area_ini)

    if os.path.isdir(run_eval_dir) is False:
        io_function.mkdir(run_eval_dir)
        os.chdir(run_eval_dir)

        # copy and modify parameters
        io_function.copy_file_to_dst(os.path.join(template_dir, main_para),
                                     main_para)
        io_function.copy_file_to_dst(area_ini, area_ini_name)
        # set training_data_per=0, then all the data will be input for evaluation
        modify_parameter(main_para, 'training_regions', area_ini_name)
        io_function.copy_file_to_dst(
            os.path.join(template_dir, 'deeplabv3plus_xception65.ini'),
            'deeplabv3plus_xception65.ini')

        if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name:
            io_function.copy_file_to_dst(
                os.path.join(template_dir, 'exe_curc.sh'), 'exe_curc.sh')
            io_function.copy_file_to_dst(
                os.path.join(template_dir, 'run_INsingularity_curc_GPU_tf.sh'),
                'run_INsingularity_curc_GPU_tf.sh')
            io_function.copy_file_to_dst(
                os.path.join(template_dir, 'job_tf_GPU.sh'), 'job_tf_GPU.sh')

            job_name = 'eval_%d_area' % idx
            slurm_utility.modify_slurm_job_sh('job_tf_GPU.sh', 'job-name',
                                              job_name)
        else:
            # copy
            io_function.copy_file_to_dst(
                os.path.join(template_dir, 'exe_eval.sh'), 'exe_eval.sh')

    else:
        os.chdir(run_eval_dir)

    # if run in curc cluster
    if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name:

        while True:
            job_count = slurm_utility.get_submit_job_count(
                curc_username, job_name_substr='eval')
            if job_count >= max_run_jobs:
                print(
                    machine_name, datetime.now(),
                    'You have submitted %d or more jobs, wait ' % max_run_jobs)
                time.sleep(60)  #
                continue
            break

        # submit a job
        res = os.system('sbatch job_tf_GPU.sh')
        if res != 0:
            sys.exit(1)
    else:

        deviceIDs = []
        while True:
            # get available GPUs  # https://github.com/anderskm/gputil
            deviceIDs = GPUtil.getAvailable(order='memory',
                                            limit=100,
                                            maxLoad=0.5,
                                            maxMemory=0.5,
                                            includeNan=False,
                                            excludeID=[],
                                            excludeUUID=[])
            basic.outputlogMessage('deviceIDs: %s' % str(deviceIDs))
            if len(deviceIDs) < 1:
                time.sleep(
                    60)  # wait one minute, then check the available GPUs again
                continue
            break

        while True:
            job_count = basic.alive_process_count(local_tasks)
            if job_count >= max_run_jobs:
                print(
                    machine_name, datetime.now(),
                    '%d (>%d) jobs are running, wait ' %
                    (job_count, max_run_jobs))
                time.sleep(60)  #
                continue
            break

        job_sh = 'exe_eval.sh'
        gpuid = deviceIDs[0]
        # modify gpuid in exe_eval.sh
        with open(job_sh, 'r') as inputfile:
            list_of_all_the_lines = inputfile.readlines()
            for i in range(0, len(list_of_all_the_lines)):
                line = list_of_all_the_lines[i]
                if 'CUDA_VISIBLE_DEVICES' in line:
                    list_of_all_the_lines[
                        i] = 'export CUDA_VISIBLE_DEVICES=%d\n' % gpuid
                    print('Set %s' % list_of_all_the_lines[i])
            # write the new file and overwrite the old one
        with open(job_sh, 'w') as outputfile:
            outputfile.writelines(list_of_all_the_lines)
            outputfile.close()

        # run
        sub_process = Process(target=run_exe_eval)
        sub_process.start()
        local_tasks.append(sub_process)

        # wait until the assigned is used or exceed 100 seconds
        t0 = time.time()
        while True:
            gpu_ids = GPUtil.getAvailable(order='memory',
                                          limit=100,
                                          maxLoad=0.5,
                                          maxMemory=0.5,
                                          includeNan=False,
                                          excludeID=[],
                                          excludeUUID=[])
            t1 = time.time()
            # print(gpu_ids, t1-t0)
            if len(gpu_ids) < 1 or gpu_ids[0] != gpuid or (t1 - t0) > 100:
                break
            else:
                time.sleep(0.5)

        if sub_process.exitcode is not None and sub_process.exitcode != 0:
            sys.exit(1)

    os.chdir(curr_dir)
Example #7
0
    def _run(self, _, frontend, sink, backend):
        # bind all sockets
        self.logger.info('bind all sockets')
        frontend.bind('tcp://*:%d' % self.port)
        addr_front2sink = auto_bind(sink)
        addr_backend = auto_bind(backend)

        # start the sink process
        self.logger.info('start the sink')
        proc_sink = BertSink(self.args, addr_front2sink)
        self.processes.append(proc_sink)
        proc_sink.start()
        addr_sink = sink.recv().decode('ascii')

        self.logger.info('get devices')
        run_on_gpu = False
        device_map = [-1] * self.num_worker
        if not self.args.cpu:
            try:
                import GPUtil
                num_all_gpu = len(GPUtil.getGPUs())
                avail_gpu = GPUtil.getAvailable(order='memory',
                                                limit=min(
                                                    num_all_gpu,
                                                    self.num_worker))
                num_avail_gpu = len(avail_gpu)

                if num_avail_gpu >= self.num_worker:
                    run_on_gpu = True
                elif 0 < num_avail_gpu < self.num_worker:
                    self.logger.warning(
                        'only %d out of %d GPU(s) is available/free, but "-num_worker=%d"'
                        % (num_avail_gpu, num_all_gpu, self.num_worker))
                    self.logger.warning(
                        'multiple workers will be allocated to one GPU, '
                        'may not scale well and may raise out-of-memory')
                    run_on_gpu = True
                else:
                    self.logger.warning('no GPU available, fall back to CPU')

                if run_on_gpu:
                    device_map = (avail_gpu *
                                  self.num_worker)[:self.num_worker]
            except FileNotFoundError:
                self.logger.warning(
                    'nvidia-smi is missing, often means no gpu on this machine. '
                    'fall back to cpu!')

        self.logger.info(
            'device map: \n\t\t%s' %
            '\n\t\t'.join('worker %2d -> %s' %
                          (w_id, ('gpu %2d' % g_id) if g_id >= 0 else 'cpu')
                          for w_id, g_id in enumerate(device_map)))

        # start the backend processes
        for idx, device_id in enumerate(device_map):
            process = BertWorker(idx, self.args, addr_backend, addr_sink,
                                 device_id, self.graph_path)
            self.processes.append(process)
            process.start()

        num_req = defaultdict(int)
        while True:
            try:
                request = frontend.recv_multipart()
                client, msg, req_id, msg_len = request
                if msg == ServerCommand.terminate:
                    break
                elif msg == ServerCommand.show_config:
                    num_req['config'] += 1
                    self.logger.info(
                        'new config request\treq id: %d\tclient: %s' %
                        (int(req_id), client))
                    status_runtime = {
                        'client': client.decode('ascii'),
                        'num_process': len(self.processes),
                        'ventilator -> worker': addr_backend,
                        'worker -> sink': addr_sink,
                        'ventilator <-> sink': addr_front2sink,
                        'server_current_time': str(datetime.now()),
                        'num_config_request': num_req['config'],
                        'num_data_request': num_req['data'],
                        'run_on_gpu': run_on_gpu
                    }

                    sink.send_multipart([
                        client, msg,
                        jsonapi.dumps({
                            **status_runtime,
                            **self.status_args,
                            **self.status_static
                        }), req_id
                    ])
                else:
                    num_req['data'] += 1
                    self.logger.info(
                        'new encode request\treq id: %d\tsize: %d\tclient: %s'
                        % (int(req_id), int(msg_len), client))
                    # register a new job at sink
                    sink.send_multipart(
                        [client, ServerCommand.new_job, msg_len, req_id])

                    job_id = client + b'#' + req_id
                    if int(msg_len) > self.max_batch_size:
                        seqs = jsonapi.loads(msg)
                        # partition the large batch into small batches
                        s_idx = 0
                        while s_idx < int(msg_len):
                            tmp = seqs[s_idx:(s_idx + self.max_batch_size)]
                            if tmp:
                                partial_job_id = job_id + b'@%d' % s_idx
                                backend.send_multipart(
                                    [partial_job_id,
                                     jsonapi.dumps(tmp)])
                            s_idx += len(tmp)
                    else:
                        backend.send_multipart([job_id, msg])
            except ValueError:
                self.logger.error(
                    'received a wrongly-formatted request (expected 4 frames, got %d)'
                    % len(request))
                self.logger.error('\n'.join('field %d: %s' % (idx, k)
                                            for idx, k in enumerate(request)))

        self.logger.info('terminated!')
Example #8
0
    for iteration, (batch, c) in enumerate(tqdm.tqdm(dl)):
        with torch.no_grad():
            batch = batch.cuda()
            c = c.cuda()
            preds = model(batch)
            pred_cat.append(preds)
            c_cat.append(c)
    pred_cat = torch.cat(pred_cat, dim=0)
    c_cat = torch.cat(c_cat, dim=0)
    return auc_check(pred_cat, c_cat)


if __name__ == '__main__':

    if opt.cuda:
        base_gpu_list = GPUtil.getAvailable(order='memory', limit=8)
        if 5 in base_gpu_list:
            base_gpu_list.remove(5)
        base_gpu = base_gpu_list[0]
        cudnn.benchmark = True
    elif torch.cuda.is_available() and not opt.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )
    torch.cuda.set_device(base_gpu)
    for p in [3]:
        opt.dataset_index = p  # 0 = mnist, 1 = fashion, 2 = celeb
        perf_vals = []
        for seed in range(3):
            opt.epochs = epochs[opt.dataset_index]
            opt.channels = channels[opt.dataset_index]
!mv pubfig83lfw_raw_in_dirs rephrase-pubfig831/correct
!rm -r rephrase-pubfig831/correct/distract
!cp -r rephrase-pubfig831/correct rephrase-pubfig831/degraded

for image_path in tqdm(glob('rephrase-pubfig831/degraded/*/*/*.jpg')):
  degrade(image_path)

"""# **Checking Free Memory**
This block is just so that you can have an idea of the resources you have at hand on the Google Collab system.
"""

import psutil
import humanize
import os
import GPUtil as GPU
gpu = GPU.getGPUs()[0]
process = psutil.Process(os.getpid())
print(f"Gen RAM: Free {humanize.naturalsize(psutil.virtual_memory().available)} | Proc size {humanize.naturalsize(process.memory_info().rss)}")
print(f"GPU RAM: Free {gpu.memoryFree:.0f}MB | Used {gpu.memoryUsed:.0f}MB | Util {gpu.memoryUtil*100:.0f}% | Total {gpu.memoryTotal:.0f}MB")

!pip install tensorflow-gpu==2.0.0

import os
from glob import glob

import cv2
import numpy as np
from tqdm import tqdm

"""# **Main Code**
Example #10
0
# NOTE: First install bert-as-service via
# $
# $ pip install bert-serving-server
# $ pip install bert-serving-client
# $

# read and write TFRecord

import os

import GPUtil
import tensorflow as tf
from model_serving.client import bert_client

os.environ['CUDA_VISIBLE_DEVICES'] = str(GPUtil.getFirstAvailable()[0])
tf.logging.set_verbosity(tf.logging.INFO)

with open('README.md') as fp:
    data = [v for v in fp if v.strip()]
    bc = bert_client()
    list_vec = bc.encode(data)
    list_label = [0 for _ in data]  # a dummy list of all-zero labels

# write tfrecords

with tf.python_io.TFRecordWriter('tmp.tfrecord') as writer:

    def create_float_feature(values):
        return tf.train.Feature(float_list=tf.train.FloatList(value=values))
Example #11
0
 def start(self):
     self.running = True
     while self.running:
         gpu = GPUtil.getGPUs()[0]
         self.on_stats(gpu.load, gpu.memoryTotal, gpu.memoryUsed)
         time.sleep(self.period)
    options = parser.parse_args()

    if not options.agent_profile:
        parser.error('Agent profile must be selected')

    if not options.agent_path:
        parser.error('Agent path must be selected')

    if not options.temp_path:
        parser.error('Out experience path must be selected')

    if not options.games_num:
        parser.error('Number of games must be selected')

    num_gpus = len(GPUtil.getGPUs())

    if num_gpus < 1:
        throw_error("Host does not have GPU! Aborting...")

    if num_gpus == 1:
        print(
            "Single-gpu machine detected, starting in the synchronous mode...")

        iteration_memory_path = options.temp_path + '/' + generate_unique_memory_name(
        )

        generate_self_play(options.agent_profile, options.agent_path,
                           options.games_num, iteration_memory_path,
                           options.max_steps, options.verbose, options.debug,
                           options.exploration_decay_steps)
Example #13
0
File: gpu.py Project: kintatta/d3rl
def get_gpu_count():
    return len(GPUtil.getGPUs())
Example #14
0
import GPUtil as GPU

GPUlist = GPU.getAvailable(order='first', limit=999)
a = 1
Example #15
0
def main(options, args):

    print(
        "%s : prediction using the trained model (run parallel if use multiple GPUs) "
        % os.path.basename(sys.argv[0]))
    machine_name = os.uname()[1]
    start_time = datetime.datetime.now()

    para_file = args[0]
    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    basic.setlogfile('parallel_predict_Log.txt')

    deeplab_inf_script = os.path.join(code_dir, 'deeplabBased',
                                      'deeplab_inference.py')
    network_setting_ini = parameters.get_string_parameters(
        para_file, 'network_setting_ini')

    global tf1x_python
    tf1x_python = parameters.get_file_path_parameters(network_setting_ini,
                                                      'tf1x_python')

    trained_model = options.trained_model

    outdir = parameters.get_directory(para_file, 'inf_output_dir')

    # remove previous results (let user remove this folder manually or in exe.sh folder)
    io_function.mkdir(outdir)

    # get name of inference areas
    multi_inf_regions = parameters.get_string_list_parameters(
        para_file, 'inference_regions')

    # max_parallel_inf_task = parameters.get_digit_parameters(para_file,'max_parallel_inf_task','int')

    b_use_multiGPUs = parameters.get_bool_parameters(para_file,
                                                     'b_use_multiGPUs')

    # loop each inference regions
    sub_tasks = []
    for area_idx, area_ini in enumerate(multi_inf_regions):

        area_name = parameters.get_string_parameters(area_ini, 'area_name')
        area_remark = parameters.get_string_parameters(area_ini, 'area_remark')
        area_time = parameters.get_string_parameters(area_ini, 'area_time')

        inf_image_dir = parameters.get_directory(area_ini, 'inf_image_dir')

        # it is ok consider a file name as pattern and pass it the following functions to get file list
        inf_image_or_pattern = parameters.get_string_parameters(
            area_ini, 'inf_image_or_pattern')

        inf_img_list = io_function.get_file_list_by_pattern(
            inf_image_dir, inf_image_or_pattern)
        img_count = len(inf_img_list)
        if img_count < 1:
            raise ValueError(
                'No image for inference, please check inf_image_dir and inf_image_or_pattern in %s'
                % area_ini)

        area_save_dir = os.path.join(
            outdir, area_name + '_' + area_remark + '_' + area_time)
        io_function.mkdir(area_save_dir)

        # parallel inference images for this area
        CUDA_VISIBLE_DEVICES = []
        if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
            CUDA_VISIBLE_DEVICES = [
                int(item.strip())
                for item in os.environ['CUDA_VISIBLE_DEVICES'].split(',')
            ]
        idx = 0
        while idx < img_count:

            if b_use_multiGPUs:
                # get available GPUs  # https://github.com/anderskm/gputil
                deviceIDs = GPUtil.getAvailable(order='first',
                                                limit=100,
                                                maxLoad=0.5,
                                                maxMemory=0.5,
                                                includeNan=False,
                                                excludeID=[],
                                                excludeUUID=[])
                # only use the one in CUDA_VISIBLE_DEVICES
                if len(CUDA_VISIBLE_DEVICES) > 0:
                    deviceIDs = [
                        item for item in deviceIDs
                        if item in CUDA_VISIBLE_DEVICES
                    ]
                    basic.outputlogMessage('on ' + machine_name +
                                           ', available GPUs:' +
                                           str(deviceIDs) +
                                           ', among visible ones:' +
                                           str(CUDA_VISIBLE_DEVICES))
                else:
                    basic.outputlogMessage('on ' + machine_name +
                                           ', available GPUs:' +
                                           str(deviceIDs))

                if len(deviceIDs) < 1:
                    time.sleep(
                        60
                    )  # wait one minute, then check the available GPUs again
                    continue
                # set only the first available visible
                gpuid = deviceIDs[0]
                basic.outputlogMessage(
                    '%d: predict image %s on GPU %d of %s' %
                    (idx, inf_img_list[idx], gpuid, machine_name))
            else:
                gpuid = None
                basic.outputlogMessage('%d: predict image %s on %s' %
                                       (idx, inf_img_list[idx], machine_name))

            # run inference
            img_save_dir = os.path.join(area_save_dir, 'I%d' % idx)
            inf_list_file = os.path.join(area_save_dir, '%d.txt' % idx)

            # if it already exist, then skip
            if os.path.isdir(img_save_dir) and is_file_exist_in_folder(
                    img_save_dir):
                basic.outputlogMessage(
                    'folder of %dth image (%s) already exist, '
                    'it has been predicted or is being predicted' %
                    (idx, inf_img_list[idx]))
                idx += 1
                continue

            with open(inf_list_file, 'w') as inf_obj:
                inf_obj.writelines(inf_img_list[idx] + '\n')

            sub_process = Process(target=predict_one_image_deeplab,
                                  args=(deeplab_inf_script, para_file,
                                        network_setting_ini, img_save_dir,
                                        inf_list_file, gpuid, trained_model))
            sub_process.start()
            sub_tasks.append(sub_process)

            if b_use_multiGPUs is False:
                # wait until previous one finished
                while sub_process.is_alive():
                    time.sleep(5)

            idx += 1

            # wait until predicted image patches exist or exceed 20 minutes
            time0 = time.time()
            elapsed_time = time.time() - time0
            while elapsed_time < 20 * 60:
                elapsed_time = time.time() - time0
                file_exist = is_file_exist_in_folder(img_save_dir)
                if file_exist is True or sub_process.is_alive() is False:
                    break
                else:
                    time.sleep(5)

            if sub_process.exitcode is not None and sub_process.exitcode != 0:
                sys.exit(1)

            close_remove_completed_task(sub_tasks)
            # if 'chpc' in machine_name:
            #     time.sleep(60)  # wait 60 second on ITSC services
            # else:
            #     time.sleep(10)

    # check all the tasks already finished
    while b_all_task_finish(sub_tasks) is False:
        basic.outputlogMessage('wait all tasks to finish')
        time.sleep(60)
    close_remove_completed_task(sub_tasks)

    end_time = datetime.datetime.now()

    diff_time = end_time - start_time
    out_str = "%s: time cost of total parallel inference on %s: %d seconds" % (
        str(end_time), machine_name, diff_time.seconds)
    basic.outputlogMessage(out_str)
    with open("time_cost.txt", 'a') as t_obj:
        t_obj.writelines(out_str + '\n')
Example #16
0
def check_memory():
    """ Check usable system memory
    Warn the user if insufficient memory is available for
    the number of processes that the user have chosen.
    """

    memory_status = []
    # get system available memory
    system_memory_available = psutil.virtual_memory().available / (1024**3)
    memory_status.append(('system', system_memory_available))

    # check if Nvidia-smi is available
    # GPUtil requires nvidia-smi.exe to interact with GPU
    if args.method in ['gpu', 'cudnn']:
        if not (shutil.which('nvidia-smi') or pathlib.Path(
                r'C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe').
                is_file()):
            # Nvidia System Management Interface not available
            Avalon.warning(
                'Nvidia-smi not available, skipping available memory check')
            Avalon.warning(
                'If you experience error \"cudaSuccess out of memory\", try reducing number of processes you\'re using'
            )
        else:
            with contextlib.suppress(ValueError):
                # "0" is GPU ID. Both waifu2x drivers use the first GPU available, therefore only 0 makes sense
                gpu_memory_available = (GPUtil.getGPUs()[0].memoryTotal -
                                        GPUtil.getGPUs()[0].memoryUsed) / 1024
                memory_status.append(('GPU', gpu_memory_available))

    # go though each checkable memory type and check availability
    for memory_type, memory_available in memory_status:

        if memory_type == 'system':
            mem_per_process = SYS_MEM_PER_PROCESS
        else:
            mem_per_process = GPU_MEM_PER_PROCESS

        # if user doesn't even have enough memory to run even one process
        if memory_available < mem_per_process:
            Avalon.warning(
                f'You might have insufficient amount of {memory_type} memory available to run this program ({memory_available} GB)'
            )
            Avalon.warning('Proceed with caution')
            if args.processes > 1:
                if Avalon.ask('Reduce number of processes to avoid crashing?',
                              default=True,
                              batch=args.batch):
                    args.processes = 1
        # if memory available is less than needed, warn the user
        elif memory_available < (mem_per_process * args.processes):
            Avalon.warning(
                f'Each waifu2x-caffe process will require up to {SYS_MEM_PER_PROCESS} GB of system memory'
            )
            Avalon.warning(
                f'You demanded {args.processes} processes to be created, but you only have {round(memory_available, 4)} GB {memory_type} memory available'
            )
            Avalon.warning(
                f'{mem_per_process * args.processes} GB of {memory_type} memory is recommended for {args.processes} processes'
            )
            Avalon.warning(
                f'With your current amount of {memory_type} memory available, {int(memory_available // mem_per_process)} processes is recommended'
            )

            # ask the user if he / she wants to change to the recommended
            # number of processes
            if Avalon.ask('Change to the recommended value?',
                          default=True,
                          batch=args.batch):
                args.processes = int(memory_available // mem_per_process)
            else:
                Avalon.warning('Proceed with caution')
Example #17
0
def get_args():
    """
    Returns a namedtuple with arguments extracted from the command line.
    :return: A namedtuple with arguments
    """
    parser = argparse.ArgumentParser(
        description=
        'Welcome to the MLP course\'s Pytorch training and inference helper script'
    )

    parser.add_argument('--batch_size',
                        nargs="?",
                        type=int,
                        default=100,
                        help='Batch_size for experiment')
    parser.add_argument('--lstm_hidden_dim',
                        nargs="?",
                        type=int,
                        default=512,
                        help='Hidden_dim for LSTM')
    parser.add_argument('--lr',
                        nargs="?",
                        type=float,
                        default=0.01,
                        help='Learning rate')
    parser.add_argument('--encoder_output_size',
                        nargs="?",
                        type=int,
                        default=1024,
                        help='Size of the output of the encoder')
    parser.add_argument(
        '--fc1_size',
        nargs="?",
        type=int,
        default=512,
        help='Size of the output of the first layer of the siamese network')
    parser.add_argument(
        '--fc2_size',
        nargs="?",
        type=int,
        default=2048,
        help='Size of the output of the second layer of the siamese network')
    parser.add_argument('--model_name',
                        nargs="?",
                        type=str,
                        default="baseline",
                        help='Model for the experiment')
    parser.add_argument('--continue_from_epoch',
                        nargs="?",
                        type=int,
                        default=-1,
                        help='Batch_size for experiment')
    parser.add_argument(
        '--dataset_name',
        type=str,
        help='Dataset on which the system will train/eval our model')
    parser.add_argument(
        '--seed',
        nargs="?",
        type=int,
        default=7112018,
        help='Seed to use for random number generator for experiment')
    parser.add_argument('--num_layers',
                        nargs="?",
                        type=int,
                        default=4,
                        help='Number of LSTM layers')
    parser.add_argument('--num_epochs',
                        nargs="?",
                        type=int,
                        default=100,
                        help='The experiment\'s epoch budget')
    parser.add_argument('--dropout_rate',
                        nargs="?",
                        type=float,
                        default=0.0,
                        help='Dropout rate')
    parser.add_argument(
        '--experiment_name',
        nargs="?",
        type=str,
        default="exp_1",
        help='Experiment name - to be used for building the experiment folder')
    parser.add_argument(
        '--use_gpu',
        nargs="?",
        type=str2bool,
        default=False,
        help='A flag indicating whether we will use GPU acceleration or not')
    parser.add_argument('--gpu_id',
                        type=str,
                        default="None",
                        help="A string indicating the gpu to use")
    parser.add_argument('--weight_decay_coefficient',
                        nargs="?",
                        type=float,
                        default=1e-05,
                        help='Weight decay to use for Adam')
    parser.add_argument('--filepath_to_arguments_json_file',
                        nargs="?",
                        type=str,
                        default=None,
                        help='')

    args = parser.parse_args()
    gpu_id = str(args.gpu_id)
    if args.filepath_to_arguments_json_file is not None:
        args = extract_args_from_json(
            json_file_path=args.filepath_to_arguments_json_file,
            existing_args_dict=args)

    if gpu_id != "None":
        args.gpu_id = gpu_id

    arg_str = [(str(key), str(value)) for (key, value) in vars(args).items()]
    print(arg_str)

    if args.use_gpu == True:
        num_requested_gpus = len(args.gpu_id.split(","))
        num_received_gpus = len(
            GPUtil.getAvailable(order='first',
                                limit=8,
                                maxLoad=0.1,
                                maxMemory=0.1,
                                includeNan=False,
                                excludeID=[],
                                excludeUUID=[]))

        if num_requested_gpus == 1 and num_received_gpus > 1:
            print("Detected Slurm problem with GPUs, attempting automated fix")
            gpu_to_use = GPUtil.getAvailable(order='first',
                                             limit=num_received_gpus,
                                             maxLoad=0.1,
                                             maxMemory=0.1,
                                             includeNan=False,
                                             excludeID=[],
                                             excludeUUID=[])
            if len(gpu_to_use) > 0:
                os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_to_use[0])
                print("Using GPU with ID", gpu_to_use[0])
            else:
                print(
                    "Not enough GPUs available, please try on another node now, or retry on this node later"
                )
                sys.exit()

        elif num_requested_gpus > 1 and num_received_gpus > num_requested_gpus:
            print("Detected Slurm problem with GPUs, attempting automated fix")
            gpu_to_use = GPUtil.getAvailable(order='first',
                                             limit=num_received_gpus,
                                             maxLoad=0.1,
                                             maxMemory=0.1,
                                             includeNan=False,
                                             excludeID=[],
                                             excludeUUID=[])

            if len(gpu_to_use) >= num_requested_gpus:
                os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
                    str(gpu_idx)
                    for gpu_idx in gpu_to_use[:num_requested_gpus])
                print("Using GPU with ID", gpu_to_use[:num_requested_gpus])
            else:
                print(
                    "Not enough GPUs available, please try on another node now, or retry on this node later"
                )
                sys.exit()

    import torch
    args.use_cuda = torch.cuda.is_available()

    if torch.cuda.is_available(
    ):  # checks whether a cuda gpu is available and whether the gpu flag is True
        device = torch.cuda.current_device()
        print("use {} GPU(s)".format(torch.cuda.device_count()),
              file=sys.stderr)
    else:
        print("use CPU", file=sys.stderr)
        device = torch.device('cpu')  # sets the device to be CPU

    return args, device
Example #18
0
def available_gpu(*args, **kwargs):
    """This function is an alias for ``GPUtil.getAvailable``. If
    ``GPUtil`` is not installed, it returns [0,] as a default GPU ID."""

    return GPUtil.getAvailable(*args, **kwargs)
Example #19
0
 def get_remote_gpu():
     gpus = GPUtil.getGPUs()
     total_mem_mb = gpus[0].memory_total
     return total_mem_mb * BYTES_PER_MiB
    ds = xr.open_dataset(MITGCM_filename)
    tr_start = 0
    tr_end = int(train_end_ratio * dataset_end_index)
    val_end = int(val_end_ratio * dataset_end_index)
    x_dim = (ds.isel(T=slice(0))).sizes['X']
    y_dim = (ds.isel(T=slice(0))).sizes['Y']
    z_dim = (ds.isel(T=slice(0))).sizes['Zld000038']
    ds.close()

    logging.info('Model ; ' + args.name + '\n')

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    logging.info('Using device: ' + device + '\n')

    TimeCheck(tic, 'setting variables')
    logging.info(GPUtil.showUtilization())

    #-----------------------------------
    # Read in mean and std
    #-----------------------------------
    data_mean, data_std, data_range = ReadMeanStd(MeanStd_prefix)

    TimeCheck(tic, 'getting mean & std')
    logging.info(GPUtil.showUtilization())

    if args.dim == '2d':
        no_in_channels = args.histlen * (
            3 * z_dim + 1
        ) + 3 * z_dim + 1  # Eta field, plus Temp, U, V through depth, for each past time, plus masks
        no_out_channels = 3 * z_dim + 1  # Eta field, plus Temp, U, V through depth, just once
    elif args.dim == '3d':
    def GetStaticStatsDict(
            self):  # Gets Static Stats And Puts Them Into A Dictionary #

        # Create New Dictionary #
        self.SystemHardware = {}

        # Get System Name Info #
        PlatformInfo = platform.uname()

        self.SystemHardware.update(
            {'OperatingSystemName': PlatformInfo.system})
        self.SystemHardware.update({'NodeName': PlatformInfo.node})
        self.NodeName = PlatformInfo.node
        self.SystemHardware.update(
            {'OperatingSystemRelease': PlatformInfo.release})
        self.SystemHardware.update(
            {'OperatingSystemVersion': PlatformInfo.version})

        # Get Last Boot Info #
        BootTimeInfo = psutil.boot_time()

        BootTimeDateTimeObject = datetime.datetime.fromtimestamp(BootTimeInfo)
        self.SystemHardware.update({
            'BootTimeDateString':
            f'{BootTimeDateTimeObject.year}/{BootTimeDateTimeObject.month}/{BootTimeDateTimeObject.day} {BootTimeDateTimeObject.hour}:{BootTimeDateTimeObject.minute}:{BootTimeDateTimeObject.second}'
        })

        # Get System CPU Info #
        CPUInfo = cpuinfo.get_cpu_info()

        self.SystemHardware.update(
            {'PythonVersion': CPUInfo.get('python_version')})
        self.SystemHardware.update(
            {'CPUInfoVersion': CPUInfo.get('cpuinfo_version_string')})
        self.SystemHardware.update({'CPUArchitecture': CPUInfo.get('arch')})
        self.SystemHardware.update({'CPUBits': CPUInfo.get('bits')})
        self.SystemHardware.update({'CPUThreads': CPUInfo.get('count})')})
        self.SystemHardware.update(
            {'CPUCores': psutil.cpu_count(logical=False)})
        self.SystemHardware.update({'CPUVendor': CPUInfo.get('vendor_id_raw')})
        self.SystemHardware.update({'CPUName': CPUInfo.get('brand_raw')})
        self.SystemHardware.update(
            {'CPUBaseClock': CPUInfo.get('hz_advertized_friendly')})
        self.SystemHardware.update({'CPUInstructionSet': CPUInfo.get('flags')})
        self.SystemHardware.update(
            {'CPUL3CacheSize': CPUInfo.get('l3_cache_size')})
        self.SystemHardware.update(
            {'CPUL2CacheSize': CPUInfo.get('l2_cache_Size')})
        self.SystemHardware.update(
            {'CPUL1CacheSize': CPUInfo.get('l1_cache_size')})

        # Get System Ram Info #
        RamInfo = psutil.virtual_memory()

        self.SystemHardware.update({'TotalSystemRAM': RamInfo.total})

        SwapInfo = psutil.swap_memory()

        self.SystemHardware.update({'TotalSystemSwap': SwapInfo.total})

        # Get System Disk Info #
        Partitions = psutil.disk_partitions()

        PartitionDevices = []
        PartitionMountPoints = []
        PartitionFileSystemType = []
        PartitionTotal = []
        PartitionUsed = []
        PartitionFree = []
        PartitionUsagePercent = []

        for Partition in Partitions:

            PartitionDevices.append(Partition.device)
            PartitionMountPoints.append(Partition.mountpoint)
            PartitionFileSystemType.append(Partition.fstype)

            try:
                PartitionUsage = psutil.disk_usage(Partition.mountpoint)
            except PermissionError:  # Catch Exception Thrown If Partition Is Unreadable #
                continue

            PartitionTotal.append(PartitionUsage.total)
            PartitionUsed.append(PartitionUsage.used)
            PartitionFree.append(PartitionUsage.free)
            PartitionUsagePercent.append(PartitionUsage.percent)

        self.SystemHardware.update({'PartitionDevices': PartitionDevices})
        self.SystemHardware.update(
            {'PartitionMountPoints': PartitionMountPoints})
        self.SystemHardware.update(
            {'PartitionFileSystemType': PartitionFileSystemType})
        self.SystemHardware.update({'PartitionTotal': PartitionTotal})
        self.SystemHardware.update({'PartitionUsed': PartitionUsed})
        self.SystemHardware.update({'PartitionFree': PartitionFree})
        self.SystemHardware.update(
            {'PartitionUsagePercent': PartitionUsagePercent})

        # Get Network Info #
        NetNames = []
        NetAddresses = []
        NetMasks = []
        NetBroadcasts = []

        IFAddresses = psutil.net_if_addrs()

        for InterfaceName, InterfaceAddresses in IFAddresses.items():
            for Address in InterfaceAddresses:
                NetNames.append(InterfaceName)
                NetAddresses.append(Address.address)
                NetMasks.append(Address.netmask)
                NetBroadcasts.append(Address.broadcast)

        self.SystemHardware.update({'NetNames': NetNames})
        self.SystemHardware.update({'NetAddresses': NetAddresses})
        self.SystemHardware.update({'NetMasks': NetMasks})
        self.SystemHardware.update({'NetBroadcasts': NetBroadcasts})

        # GPU Info #
        GPUIds = []
        GPUNames = []
        GPUTotalMemory = []

        GPUs = GPUtil.getGPUs()

        for GPU in GPUs:

            GPUIds.append(GPU.id)
            GPUNames.append(GPU.name)
            GPUTotalMemory.append(GPU.memoryTotal)

        self.SystemHardware.update({'GPUIds': GPUIds})
        self.SystemHardware.update({'GPUNames': GPUNames})
        self.SystemHardware.update({'GPUTotalMemory': GPUTotalMemory})
Example #22
0
def get_csv_output(executable, playouts, weights, communicate_string):
    """Primary function - first three parameters build the basic setup command, the latter two are used to run the CLI and generate the output CSV"""

    # Extra argument to add at the end
    final_args = "--noponder"

    # If the machine is Linux-based, add the folders to the paths; if it is Windows, just change the current working directory
    if os.name == 'posix':
        executable = "./leela-zero-0.17/" + executable
        weights = "./leela-zero-0.17/" + weights
    else:
        os.chdir('./leela-zero-0.17')

    # Check if the user's computer has one or more GPUs - if not, set it to only use CPUs
    if not GPUtil.getGPUs():
        final_args += " --cpu-only"

    # Key command - configure the actual Leela Zero run string and print it out on-screen for ease of testing
    run_string = "{} -g -r 0 -d -p {} -w {} {}".format(executable, playouts,
                                                       weights, final_args)
    print(run_string)

    # On Windows, use wexpect, on Linux, use pexpect. Slightly different commands for each to begin Leela Zero
    if os.name == 'nt':
        child = wexpect.spawn('cmd.exe')
        child.expect('>', timeout=120)
        child.sendline(run_string)
    else:
        child = pexpect.spawn('/bin/bash -c "{}"'.format(run_string))
    child.expect('Setting max tree', timeout=120)

    # Once Leela Zero is loaded, we definitely want these three commands run first and foremost
    starting_commands = ["boardsize 19", "clear_board", "komi 7.5"]
    for command in starting_commands:
        child.sendline(command)
        child.expect(
            '=', timeout=120
        )  # Basic Leela Zero commands always end with a '=' on success (not including lz-analyze)

    # Convert our giant string of commands into a list of commands
    communicate_string_list = communicate_string.split("\n")

    # Output the full communicate_string to command_log.log for further debug review as desired
    with open("command_log.log", "w") as my_file:
        my_file.write("\n".join(communicate_string_list))

    # Set a basic counter for the current move number
    y = 0

    # all_moves will eventually become our final dataframe
    all_moves = []

    # Initiate the progress bar
    bar = pb.ProgressBar()
    colors = ['white', 'black']

    # At long last, execute our strings line-by-line. Do it three-by-three since each move has three associated commands (2x 'lz-analyze' plus 'play')
    for x in bar(range(0, len(communicate_string_list), 3)):
        y += 1
        # If the game is going longer than 180 moves, we can exit Leela Zero
        if y == 181:
            break

        # Extract the human's move from the 'play <color> <coordinate>' command
        human_move = communicate_string_list[x + 2].split(" ")[2]

        # Send the primary lz-analyze command to Leela Zero; 'max depth' appears at the end of Leela Zero's output
        child.sendline(communicate_string_list[x])
        child.expect(" max depth", timeout=120)

        # Only extract those lines of text that have actual moves in them with the key '->' substring.
        # Windows can just split it immediately, but Linux machines require the string to be decoded first.
        if os.name == 'nt':
            before_text = [
                line.strip() for line in child.before.split("\n")
                if "->" in line
            ]
        else:
            before_text = [
                line.strip()
                for line in child.before.decode("utf-8").split("\n")
                if "->" in line
            ]

        # The first line will be the move with the highest LCB winrate, which is what Leela thinks is the "best" option
        ai_first_choice_move = before_text[0]

        # Extract move coordinates and other values from the line of text
        ai_move_coords = ai_first_choice_move.split("->")[0].strip().lower()
        ai_v_value = ai_first_choice_move.split("(V: ")[1].split("%")[0]
        ai_n_value = ai_first_choice_move.split("(N: ")[1].split("%")[0]
        ai_lcb_value = ai_first_choice_move.split("(LCB: ")[1].split("%")[0]

        global b_player
        global w_player

        if colors[y % 2] == 'black':
            player = b_player
        else:
            player = w_player
        # Begin construction of move_info, i.e. one row of data in our output spreadsheet
        move_info = {
            'move_number': y,
            'ai_move': ai_move_coords,
            'ai_v_value': ai_v_value,
            'ai_n_value': ai_n_value,
            'ai_lcb_value': ai_lcb_value,
            'human_move': human_move,
            'color': colors[y % 2],
            'player': player
        }

        # As a default, assume the human's move was NOT one of the those identified by Leela Zero. Also extract all 10 moves into a pretty list.
        is_match_found = False
        top_10_moves = extract_top_10_moves(before_text)

        # Go through each move that Leela Zero looked at, checking if any were the human's move. If so, update move_info accordingly
        for top_10_move in top_10_moves:
            if top_10_move['move_coord'] == human_move:
                move_info['is_requery_needed'] = 0
                move_info['human_v_value'] = top_10_move['v_value']
                move_info['human_n_value'] = top_10_move['n_value']
                move_info['human_lcb_value'] = top_10_move['lcb_value']
                is_match_found = True
                break

        # However, if the human's move is NOT found among the top moves that Leela Zero looked at...
        if not is_match_found:
            human_command = communicate_string_list[x + 1]

            # Still setting is_requery_needed to zero - only set to 1 if this second attempt fails
            move_info['is_requery_needed'] = 0

            # Sort the top 10 moves in ascending order by n_value. Then generate a list of allowed_moves containing the human's move and the
            # other 9 Leela Zero moves that didn't have the lowest n-value.
            sorted_top_10 = sorted(top_10_moves, key=lambda i: i['n_value'])
            allowed_moves = human_move
            lowest_n = sorted_top_10[0][
                'n_value']  # Save this value for possible use later
            for top_10_move in sorted_top_10[2:]:
                allowed_moves += "," + top_10_move['move_coord']

            # Replace the "__" placeholder in this command with our new list - this will (theoretically) ensure that Leela gives the move proper attention
            human_command = human_command.replace("__", allowed_moves)
            child.sendline(human_command)
            child.expect(" max depth", timeout=120)

            # Same as before - eventually I should abstract this into a single function since I'm doing the same thing twice
            if os.name == 'nt':
                before_text = [
                    line.strip() for line in child.before.split("\n")
                    if "->" in line
                ]
            else:
                before_text = [
                    line.strip()
                    for line in child.before.decode("utf-8").split("\n")
                    if "->" in line
                ]
            top_10_moves = extract_top_10_moves(before_text)

            is_match_found = False

            # So in theory, it should always have the move now. However, it doesn't *always* (about 95% of the time it does).
            # You'd know better than me precisely why.
            for top_10_move in top_10_moves:
                if top_10_move['move_coord'] == human_move:
                    move_info['human_v_value'] = top_10_move['v_value']
                    move_info['human_n_value'] = top_10_move['n_value']
                    move_info['human_lcb_value'] = top_10_move['lcb_value']
                    is_match_found = True
                    break

            # Finally, we're now going for absolutely *force* Leela Zero to give us the V and LCB values by re-running the previous command
            # but with only a single allowable move on the entire board - the move want it to.
            # The downside, however, is that the N value is lost. Since only one move is allowable, the N value becomes about 99.96% or so.
            # To mitigate this, I just give this human move an n-value equal to the lowest N-value from the top 10 moves Leela Zero considered.
            # This is admittedly not an ideal solution, open to better ideas?
            if not is_match_found:
                human_command = human_command.replace(allowed_moves,
                                                      human_move)
                child.sendline(human_command)
                child.expect(" max depth", timeout=120)
                before_text = [
                    line.strip() for line in child.before.split("\n")
                    if "->" in line
                ]
                top_10_moves = extract_top_10_moves(before_text)
                move_info['human_v_value'] = top_10_moves[0]['v_value']
                move_info['human_n_value'] = lowest_n
                move_info['human_lcb_value'] = top_10_moves[0]['lcb_value']
                move_info['is_requery_needed'] = 1

        # Add the "row" of data to the all_moves list
        all_moves.append(move_info)

        # Execute the 3rd command - very simple, just play the human's move on the board
        child.sendline(communicate_string_list[x + 2])
        child.expect('=', timeout=120)
    child.sendline('exit')

    # Generate the dataframe, organize the columns, and return the finished dataframe
    df = pd.DataFrame(all_moves)
    column_order = [
        "move_number", "color", "human_move", "ai_move", "human_v_value",
        "ai_v_value", "human_n_value", "ai_n_value", "human_lcb_value",
        "ai_lcb_value", 'is_requery_needed', 'player'
    ]
    df = df[column_order]
    return df
Example #23
0
async def get_gpu_state(request):
    temps = []
    for gpu in GPUtil.getGPUs():
        temps.append(float(gpu.temperature))
    return web.Response(text=str(max(temps)), content_type="text/html")
Example #24
0
while True:
    count = count + 1
    ser = serial.Serial(DEVICE, 115200)  # open serial port

    # CPU Stats
    cpu_percent = str(
        psutil.cpu_percent(interval=1))  # Get CPU percent (Takes 1 second)
    cpu_temp = "..."  # Haven't figured out how to get CPU temp yet

    # RAM Stats
    ram_percent = str(psutil.virtual_memory().percent)
    ram_used = str(round(psutil.virtual_memory().used / (2**30), 1))
    ram_total = str(round(psutil.virtual_memory().total / (2**30), 1))

    # Get GPU Stats (Windows Only)
    if GPUtil.getGPUs():
        gpu_percent = f"{round(GPUtil.getGPUs()[0].load*100, 2)}"
        gpu_temp = f"{GPUtil.getGPUs()[0].temperature}"
    else:
        gpu_percent = ""
        gpu_temp = ""

    # Get Disk Stats
    storage_total, storage_used, storage_free = shutil.disk_usage("/")
    disk_percent = str(
        round((storage_used / (2**30)) / (storage_total / (2**30)) * 100, 1))
    disk_total = str(round(storage_total / (2**30)))
    disk_used = str(round(storage_used / (2**30)))

    # Create JSON Data
    command = '{"cpu":{"percent": "' + cpu_percent + '", "temp": "' + cpu_temp + '"},"ram":{"percent": "' + ram_percent + '", "used":"' + ram_used + '", "total":"' + ram_total + '"},"gpu":{"percent":"' + gpu_percent + '", "temp":"' + gpu_temp + '"},"disk":{"percent":"' + disk_percent + '","used":"' + disk_used + '","total":"' + disk_total + '"}}\n\r'
Example #25
0
def model(train_x, train_y, dev_x, dev_y, test_x, test_y, overal_maxlen, qwks):
    from keras.models import Sequential
    from keras.layers import Dense, Dropout, Activation, GlobalAveragePooling1D
    from keras.layers.embeddings import Embedding
    from keras.layers.recurrent import LSTM
    from keras.initializers import Constant
    from keras import optimizers
    import keras.backend as K
    from deepats.my_layers import MeanOverTime
    from deepats.rwa import RWA
    import pickle as pk
    import numpy as np
    import string
    import random
    import os
    from deepats.optimizers import get_optimizer

    from deepats.ets_evaluator import Evaluator
    import deepats.ets_reader as dataset
    from deepats.ets_config import get_args
    import GPUtil

    def random_id(size=6, chars=string.ascii_uppercase + string.digits):
        return ''.join(random.choice(chars) for _ in range(size))

    def kappa_metric(t, x):
        u = 0.5 * K.sum(K.square(x - t))
        v = K.dot(K.transpose(x), t - K.mean(t))
        return v / (v + u)

    def kappa_loss(t, x):
        u = K.sum(K.square(x - t))
        v = K.dot(K.squeeze(x, 1), K.squeeze(t - K.mean(t), 1))
        return u / (2 * v + u)

    import time
    ms = int(round(time.time() * 1000))
    rand_seed = ms % (2**32 - 1)
    random.seed(rand_seed)

    args = get_args()
    model_id = random_id()

    abs_vocab_file = os.path.join(args.abs_out, 'vocab.pkl')
    with open(abs_vocab_file, 'rb') as vocab_file:
        vocab = pk.load(vocab_file)
    vocab_size = len(vocab)

    acts = ['tanh', 'relu', 'hard_sigmoid']
    emb_dim = {{choice([50, 100, 200, 300])}}
    rnn_dim = {{uniform(50, 500)}}
    rnn_dim = int(rnn_dim)
    rec_act = {{choice([0, 1, 2])}}
    rec_act = acts[rec_act]
    dropout = {{uniform(0.2, 0.95)}}

    epochs = args.epochs
    n_emb = vocab_size * emb_dim
    n_rwa = (903 + 2 * rnn_dim) * rnn_dim
    n_tot = n_emb + n_rwa + rnn_dim + 1

    lr = {{lognormal(-3 * 2.3, .8)}}
    lr = 1.5 * lr
    rho = {{normal(.875, .04)}}
    clipnorm = {{uniform(1, 15)}}
    eps = {{loguniform(-8 * 2.3, -5 * 2.3)}}

    opt = optimizers.RMSprop(lr=lr, rho=rho, clipnorm=clipnorm, epsilon=eps)
    loss = kappa_loss
    metric = kappa_metric

    evl = Evaluator(dataset,
                    args.prompt_id,
                    args.abs_out,
                    dev_x,
                    test_x,
                    dev_df,
                    test_df,
                    model_id=model_id)

    train_y_mean = train_y.mean(axis=0)
    if train_y_mean.ndim == 0:
        train_y_mean = np.expand_dims(train_y_mean, axis=1)
    num_outputs = len(train_y_mean)

    mask_zero = False

    model = Sequential()
    model.add(Embedding(vocab_size, emb_dim, mask_zero=mask_zero))
    model.add(RWA(rnn_dim, recurrent_activation=rec_act))
    model.add(Dropout(dropout))
    bias_value = (np.log(train_y_mean) - np.log(1 - train_y_mean)).astype(
        K.floatx())
    model.add(Dense(num_outputs, bias_initializer=Constant(value=bias_value)))
    model.add(Activation('tanh'))
    model.emb_index = 0

    from deepats.w2vEmbReader import W2VEmbReader as EmbReader
    emb_reader = EmbReader(args.emb_path, emb_dim)
    emb_reader.load_embeddings(vocab)
    emb_wts = emb_reader.get_emb_matrix_given_vocab(
        vocab, model.layers[model.emb_index].get_weights()[0])
    wts = model.layers[model.emb_index].get_weights()
    wts[0] = emb_wts
    model.layers[model.emb_index].set_weights(wts)

    model.compile(loss=loss, optimizer=opt, metrics=[metric])
    model_yaml = model.to_yaml()

    import GPUtil
    if GPUtil.avail_mem() < 0.1:
        return {'loss': 1, 'status': STATUS_OK, 'model': '', 'weights': None}

    print('model_id: %s' % (model_id))
    print(model_yaml)
    print('PARAMS\t\
    %s\t\
    lr= %.4f\t\
    rho= %.4f\t\
    clip= %.4f\t\
    eps= %.4f\t\
    embDim= %.4f\t\
    rnnDim= %.4f\t\
    drop= %.4f\t\
    recAct= %s' % (model_id, lr, rho, clipnorm, np.log(eps) / 2.3, emb_dim,
                   rnn_dim, dropout, rec_act))

    for i in range(epochs):
        train_history = model.fit(train_x,
                                  train_y,
                                  batch_size=args.batch_size,
                                  epochs=1,
                                  verbose=0)
        evl.evaluate(model, i)
        evl.output_info()

        p = evl.stats[3] / qwks[0]
        if i > 10 and p < 0.9:
            break

    i = evl.comp_idx
    j = i + 2
    best_dev_kappa = evl.best_dev[i]
    best_test_kappa = evl.best_dev[j]

    print('Test kappa:', best_dev_kappa)
    return {
        'loss': 1 - best_dev_kappa,
        'status': STATUS_OK,
        'model': model.to_yaml(),
        'weights': pk.dumps(model.get_weights())
    }
Example #26
0
try:
    # get IO statistics since boot
    net_io = psutil.net_io_counters()
    print(f"Total Bytes Sent: {get_size(net_io.bytes_sent)}")
    print(f"Total Bytes Received: {get_size(net_io.bytes_recv)}")
except Exception as e:
    print(e)

    # GPU information
import GPUtil
from tabulate import tabulate

try:
    print("=" * 40, "GPU Details", "=" * 40)
    gpus = GPUtil.getGPUs()
    list_gpus = []
    for gpu in gpus:
        # get the GPU id
        gpu_id = gpu.id
        # name of GPU
        gpu_name = gpu.name
        # get % percentage of GPU usage of that GPU
        gpu_load = f"{gpu.load*100}%"
        # get free memory in MB format
        gpu_free_memory = f"{gpu.memoryFree}MB"
        # get used memory
        gpu_used_memory = f"{gpu.memoryUsed}MB"
        # get total memory
        gpu_total_memory = f"{gpu.memoryTotal}MB"
        # get GPU temperature in Celsius
Example #27
0
                      default.val_vis, default.val_shuffle,
                      default.val_has_rpn, default.proposal,
                      default.val_max_box, default.val_thresh)

    prop_file = 'proposals_%s_%s.mat' % (default.test_image_set, default.exp_name)
    savemat(prop_file, default.res_dict)
    default.testing = False

if __name__ == '__main__':
    config_file = cfg_from_file('config.yml')
    merge_a_into_b(config_file, config)
    config.NUM_ANCHORS = len(config.ANCHOR_SCALES) * len(config.ANCHOR_RATIOS)

    default_file = cfg_from_file('default.yml')
    merge_a_into_b(default_file, default)
    default.e2e_prefix = 'model/' + default.exp_name

    if default.gpus == '':  # auto select
        import GPUtil
        deviceIDs = GPUtil.getAvailable(order='lowest', limit=1, maxLoad=0.5, maxMemory=0.5)
        GPUs = GPUtil.getGPUs()
        default.gpus = str(len(GPUs)-1-deviceIDs[0])
        logger.info('using gpu '+default.gpus)
    default.val_gpu = default.gpus
    default.prefetch_thread_num = min(default.prefetch_thread_num, config.TRAIN.SAMPLES_PER_BATCH)

    print config
    print default

    test_net(default.e2e_prefix, default.begin_epoch)
Example #28
0
    def __init__(
        self,
        models: List[nn.Module],
        model_connection: Dict[Tuple[int, int], int],
        devices: List[Union[t.device, str]] = None,
        model_size_multiplier=2,
        max_mem_ratio=0.5,
        cpu_weight=0,
        connection_weight=2,
        size_match_weight=1e-2,
        complexity_match_weight=1,
        entropy_weight=1,
        iterations=500,
        update_rate=0.01,
        gpu_gpu_distance=1,
        cpu_gpu_distance=10,
        move_models=True,
    ):
        """
        Assign models to different devices. In the scope of a single process.
        Assigner assumes all GPUs have the **same processing power**.

        Assignment is based on four aspects:

        1. Distance and model connections. Connection is usually indicated
           by the amount of data transmitted between two models.
        2. Compute complexity.
        3. Model size.
        4. Entropy.

        Four aspects are controlled by four weights:

        1. ``connection_weight``, assigner will try to reduce the total
           ``distance * connection`` if this weight is larger.
        2. ``size_match_weight``, this weight controls the total memory
           space used on a single device, only works if total assigned
           memory of models exceeds allowed device memory size
           (internally it uses a relu activation), the larger,
           the tighter and more restricted the fit.
        3. ``complexity_match_weight``, this weights balance the model
           computation cost across devices, assigner will try to even
           the ``computation cost / compute power`` ratio for each device
           if this weight is larger.
        4. ``entropy_weight``, this weight minimize the uncertainty of
           model placement probability, so ``model i`` will have a close to 1
           probability of locating on some ``device j`` if this weight is
           larger.

        Assignment uses gradient descent to compute the probability matrix
        of each ``model i`` locating on each available ``device j``.

        See Also:
            :class:`.ModelSizeEstimator`

        Note:
            When the sum of your model size is very close to the capacity of
            your device memory, `ModelAssigner` does not respond very well
            to the ``size_match_weight``, therefore, please consider about
            increasing ``model_size_multiplier`` or decreasing
            ``max_mem_ratio``.

        Args:
            models: Models to assign.
            model_connection: Connection weight between modules.
                **Must be positive**
            devices: Available devices.
            model_size_multiplier: Size multiplier of models, used to reserve
                enough space for models,
            max_mem_ratio: Maximum percent of memory allowed.
            cpu_weight: Weight of cpu. Relative to the computing power of one
                GPU. By default it is 0 so no computation will be performed on
                CPU. **Must be positive**
            connection_weight: Weight of connection between models.
            size_match_weight: Weight of size match.
            complexity_match_weight: Weight of complexity match.
            entropy_weight: Weight of entropy.
            iterations: Number of optimization iterations.
            update_rate: Learning rate of the adam optimizer.
            gpu_gpu_distance: Estimated distance cost between gpu-gpu.
                **Must be positive**
            cpu_gpu_distance: Estimated distance cost between cpu-gpu.
                **Must be positive**
            move_models: Whether to automatically move the models after
                assignment.
        """
        if devices is None:
            devices = [
                t.device(type="cuda", index=i)
                for i in GPUtil.getAvailable(order="load")
            ]
        else:
            devices = [t.device(d) for d in devices]
            available_devices = [
                t.device(type="cuda", index=i)
                for i in GPUtil.getAvailable(order="load")
            ]
            used_devices = []
            for dev in devices:
                if dev.type == "cuda" and dev not in available_devices:
                    default_logger.info(
                        f"Warning: device {dev} not available, removed.")
                else:
                    used_devices.append(dev)
            devices = used_devices

        if not devices:
            devices = [t.device("cpu")]

        default_logger.info(f"Using these devices: {devices}")

        sizes = [
            ModelSizeEstimator(model, model_size_multiplier).estimate_size()
            for model in models
        ]
        device_size_capacity = []
        device_complexity_capacity = []

        gpus = GPUtil.getGPUs()
        for dev in devices:
            if dev.type == "cpu":
                device_size_capacity.append(
                    int(psutil.virtual_memory().available / 1024**2) *
                    max_mem_ratio)
                device_complexity_capacity.append(cpu_weight)
            elif dev.type == "cuda":
                device_size_capacity.append(gpus[dev.index].memoryFree *
                                            max_mem_ratio)
                device_complexity_capacity.append(1 - gpus[dev.index].load)

        if np.sum(np.array(sizes)) > np.sum(device_size_capacity):
            raise RuntimeError(
                f"Estimated model will use {np.sum(np.array(sizes)):.2f} MB, "
                f"but only have {np.sum(device_size_capacity):.2f} MB allowed memory "
                "in total.")

        # assign model to devices
        # using heuristic and gradient decent
        device_num = len(devices)
        model_num = len(models)

        # Important, the placement probability matrix! this matrix
        # describes the probability of placement of:
        # model i on device j
        placement = t.randn([model_num, device_num], requires_grad=True)

        optimizer = t.optim.Adam([placement], lr=update_rate)
        model_size = t.tensor(sizes, dtype=t.float).view([1, model_num])
        size_capacity = t.tensor(device_size_capacity,
                                 dtype=t.float).view([1, device_num])
        model_complexity = model_size

        # complexity_capacity is basically the estimated computing power
        # of devices.
        complexity_capacity = t.tensor(device_complexity_capacity,
                                       dtype=t.float).view([1, device_num])

        # model connection indicates the amount of data transmitted between
        # each pair of models, a weighted adjacency matrix.
        model_conn = t.zeros([model_num, model_num])

        for direction, conn in model_connection.items():
            model_conn[direction[0], direction[1]] = conn

        # device distance matrix
        device_distance = t.zeros([device_num, device_num])
        for i in range(device_num):
            for j in range(i):
                if (devices[i].type == "cpu" and devices[j].type == "cuda"
                        or devices[i].type == "cuda"
                        and devices[j].type == "cpu"):
                    device_distance[i,
                                    j] = device_distance[j,
                                                         i] = cpu_gpu_distance
                elif (devices[i].type == "cuda" and devices[j].type == "cuda"
                      and devices[i].index != devices[j].index):
                    device_distance[i,
                                    j] = device_distance[j,
                                                         i] = gpu_gpu_distance

        # optimize
        for _ in range(iterations):
            self.optimize_placement(
                optimizer,
                placement,
                model_size,
                size_capacity,
                model_complexity,
                complexity_capacity,
                model_conn,
                device_distance,
                connection_weight,
                size_match_weight,
                complexity_match_weight,
                entropy_weight,
            )
        self._assignment = [
            devices[d] for d in t.argmax(placement, dim=1).tolist()
        ]
        if move_models:
            for model, ass_device in zip(models, self._assignment):
                model.to(ass_device)
Example #29
0
    parser = argparse.ArgumentParser()
    parser.add_argument('--points_batch', type=int, default=16384, help='point batch size')
    parser.add_argument('--nepoch', type=int, default=100000, help='number of epochs to train for')
    parser.add_argument('--conf', type=str, default='setup.conf')
    parser.add_argument('--expname', type=str, default='single_shape')
    parser.add_argument('--gpu', type=str, default='2', help='GPU to use [default: GPU auto]')
    parser.add_argument('--is_continue', default=False, action="store_true", help='continue')
    parser.add_argument('--timestamp', default='latest', type=str)
    parser.add_argument('--checkpoint', default='latest', type=str)
    parser.add_argument('--eval', default=False, action="store_true")

    args = parser.parse_args()

    if args.gpu == "auto":
        deviceIDs = GPUtil.getAvailable(order='memory', limit=1, maxLoad=0.5, maxMemory=0.5, includeNan=False, excludeID=[],
                                    excludeUUID=[])
        gpu = deviceIDs[0]
    else:
        gpu = args.gpu

    trainrunner = ReconstructionRunner(
            conf=args.conf,
            points_batch=args.points_batch,
            nepochs=args.nepoch,
            expname=args.expname,
            gpu_index=gpu,
            is_continue=args.is_continue,
            timestamp=args.timestamp,
            checkpoint=args.checkpoint,
            eval=args.eval
    )
!pip install GPUtil

!echo $LD_LIBRARY_PATH
# !ls /usr/lib64-nvidia
!ls /usr/local/cuda/extras/CUPTI/lib64
!export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64

from google.colab import drive
drive.mount('/content/drive')

import psutil
import humanize
import os
import GPUtil as GPU
import GPUtil as GPU
GPUs = GPU.getGPUs()
# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))

printm()
!nvidia-smi

import os
import pickle
import pandas as pd
import tensorflow as tf
from tensorflow import keras
Example #31
0
import os
import subprocess
import GPUtil


deviceIDs = GPUtil.getAvailable(order='first', limit=3, maxLoad=0.5, maxMemory=0.5)

print(','.join(str(e) for e in deviceIDs))

task_queue_file = os.path.join(os.environ.get("HOME", None), "task_queue.txt")

if not os.path.isfile(task_queue_file):
	sys.exit(0)

task_list = open(task_queue_file).readlines()

print(task_list)

for i in range(min(len(deviceIDs), len(task_list))):
	task = task_list[i].strip() + " --gpu-id=%d" % i
	print(task)
	subprocess.Popen(task, shell=True, cwd="/home/xyang22/project/research/active-learning-dnn")
Example #32
0
try:
    # Try to import cupy
    import cupy as cp
    import cupyx.scipy.linalg as cpxl
    # Try to access a device
    cp.cuda.Device(0).compute_capability
    # Flag indicating successful import
    have_cupy = True
    # Import appropriate versions of utility functions
    from ._cp_util import *
    try:
        # Try to import GPUtil
        import GPUtil
        # Check whether GPUtil is functional
        gpus = GPUtil.getGPUs()
        if gpus:
            have_gputil = True
        else:
            have_gputil = False
    except ImportError:
        have_gputil = False
    except ValueError:
        have_gputil = False
    if have_gputil:
        from ._gputil import *
    else:
        from ._nogputil import *
except Exception:
    # If cupy import or device access fails, import numpy to the same alias
    import numpy as cp
import os.path
import sys
sys.path.append(
    os.path.dirname(os.path.abspath(__file__)) + (os.path.sep + '..') * 2)

import numpy as np
import time, timeit

# is there a working GPU around ?
import GPUtil
try:
    gpu_available = len(GPUtil.getGPUs()) > 0
except:
    gpu_available = False

N = 1500
M = 300
D = 3
E = 3

# declare numpy
from pykeops.numpy.utils import differences, squared_distances, grad_np_kernel, chain_rules
a = np.random.rand(N, E).astype('float32')
x = np.random.rand(N, D).astype('float32')
y = np.random.rand(M, D).astype('float32')
b = np.random.rand(M, E).astype('float32')
sigma = np.array([0.4]).astype('float32')

# declare the torch counterpart
try:
    import torch
Example #34
0
def main(conf_name, gpu):
    # Initialize configs and prepare result dir with date

    if conf_name is None:
        conf = configs.Config()
    else:
        # this code doesn't seem to work so permanently assign the LIDAR_CONF config
        # conf = None
        # exec ('conf = configs.%s' % conf_name)
        # conf = configs.LIDAR_CONF
        if conf_name == "X2_REAL_CONF":
            conf = configs.X2_REAL_CONF
        elif conf_name == "X2_GRADUAL_IDEAL_CONF":
            conf = configs.X2_GRADUAL_IDEAL_CONF

    res_dir = prepare_result_dir(conf)
    local_dir = os.path.dirname(__file__)

    # We take all png files that are not ground truth
    files = [
        file_path
        for file_path in glob.glob('%s/*.%s' %
                                   (conf.input_path, conf.input_file_ext))
        if not file_path[-7:-4] == '_gt'
    ]
    print("files", res_dir, local_dir)
    print(files)

    # Loop over all the files
    for file_ind, input_file in enumerate(files):

        # Ground-truth file needs to be like the input file with _gt (if exists)
        ground_truth_file = input_file[:-4] + '_gt.png'
        if not os.path.isfile(ground_truth_file):
            ground_truth_file = '0'

        # Numeric kernel files need to be like the input file with serial number
        kernel_files = [
            '%s_%d.mat;' % (input_file[:-4], ind)
            for ind in range(len(conf.scale_factors))
        ]
        kernel_files_str = ''.join(kernel_files)
        for kernel_file in kernel_files:
            if not os.path.isfile(kernel_file[:-1]):
                kernel_files_str = '0'
                print('no kernel loaded')
                break

        print(kernel_files)

        # This option uses all the gpu resources efficiently
        if gpu == 'all':

            # Stay stuck in this loop until there is some gpu available with at least half capacity
            gpus = []
            while not gpus:
                gpus = GPUtil.getAvailable(order='memory')

            # Take the gpu with the most free memory
            cur_gpu = gpus[-1]

            # Run ZSSR from command line, open xterm for each run
            os.system(
                "xterm -hold -e " + conf.python_path +
                " %s/run_ZSSR_single_input.py '%s' '%s' '%s' '%s' '%s' '%s' alias python &"
                % (local_dir, input_file, ground_truth_file, kernel_files_str,
                   cur_gpu, conf_name, res_dir))

            # Verbose
            print('Ran file #%d: %s on GPU %d\n' %
                  (file_ind, input_file, cur_gpu))

            # Wait 5 seconds for the previous process to start using GPU. if we wouldn't wait then GPU memory will not
            # yet be taken and all process will start on the same GPU at once and later collapse.
            sleep(5)

        # The other option is just to run sequentially on a chosen GPU.
        else:
            run_ZSSR_single_input.main(input_file, ground_truth_file,
                                       kernel_files_str, gpu, conf, res_dir)
Example #35
0
    def run(self):
        def get_a_worker():
            return self.workers.pop(0)

        def free_a_worker(w):
            self.workers.append(w)

        def register_job(c, num_part=1):
            job_checksum[c] = num_part
            finish_jobs[c] = []

        def unregister_job(c):
            job_checksum.pop(c)
            finish_jobs.pop(c)

        self.context = zmq.Context.instance()
        self.frontend = self.context.socket(zmq.ROUTER)
        self.frontend.bind('tcp://*:%d' % self.port)
        self.backend = self.context.socket(zmq.ROUTER)
        self.backend.bind('ipc:///tmp/bert.service')

        available_gpus = range(self.num_worker)
        try:
            import GPUtil
            available_gpus = GPUtil.getAvailable(limit=self.num_worker)
            if len(available_gpus) < self.num_worker:
                logger.warning('only %d GPU(s) is available, but ask for %d' %
                               (len(available_gpus), self.num_worker))
        except FileNotFoundError:
            logger.warn(
                'nvidia-smi is missing, often means no gpu found on this machine. '
                'will run service on cpu instead')

        for i in available_gpus:
            process = BertWorker(i, self.args)
            self.processes.append(process)
            process.start()

        poller = zmq.Poller()
        # Only poll for requests from backend until workers are available
        poller.register(self.backend, zmq.POLLIN)

        job_queue, finish_jobs, job_checksum = [], {}, {}

        while True:
            sockets = dict(poller.poll(2))

            if self.backend in sockets:
                msg = self.backend.recv_multipart()
                worker, _, client = msg[:3]
                free_a_worker(worker)
                if client != b'READY' and len(msg) > 3:
                    arr_info, arr_val = jsonapi.loads(msg[4]), msg[7]
                    X = np.frombuffer(memoryview(arr_val),
                                      dtype=arr_info['dtype'])
                    finish_jobs[client].append(X.reshape(arr_info['shape']))
                else:
                    poller.register(self.frontend, zmq.POLLIN)

                # check if there are finished jobs, send it back to workers
                finished = [(k, v) for k, v in finish_jobs.items()
                            if len(v) == job_checksum[k]]
                for client, tmp in finished:
                    send_ndarray(self.frontend, client,
                                 np.concatenate(tmp, axis=0))
                    unregister_job(client)

            if self.frontend in sockets:
                client, _, msg = self.frontend.recv_multipart()
                if msg == b'SHOW_CONFIG':
                    self.frontend.send_multipart([
                        client, b'',
                        jsonapi.dumps({
                            **{
                                'client': client.decode('ascii')
                            },
                            **self.args_dict
                        })
                    ])
                    continue

                seqs = pickle.loads(msg)
                num_seqs = len(seqs)

                if num_seqs > self.max_batch_size:
                    # divide the large batch into small batches
                    s_idx = 0
                    n = 0
                    while s_idx < num_seqs:
                        tmp = seqs[s_idx:(s_idx + self.max_batch_size)]
                        if tmp:
                            job_queue.append(
                                (client, pickle.dumps(tmp, protocol=-1)))
                            n += 1
                        s_idx += len(tmp)
                    register_job(client, num_part=n)
                else:
                    register_job(client)
                    job_queue.append((client, msg))

            # non-empty job queue and free workers, pop the last one and send it to a worker
            while self.workers and job_queue:
                client, tmp = job_queue.pop()
                worker = get_a_worker()
                self.backend.send_multipart([worker, b'', client, b'', tmp])
                logger.info(
                    'available workers: %2d\tjob queue: %3d\tpending clients: %3d'
                    % (len(self.workers), len(job_queue), len(job_checksum)))
Example #36
0
import GPUtil
import subprocess
from sklearn.cluster import DBSCAN
import MDAnalysis as mda
from MDAnalysis.analysis.rms import RMSD

from utils import start_rabbit, start_worker, start_flower_monitor, read_h5py_file, cm_to_cvae, job_on_gpu
from utils import find_frame, write_pdb_frame, make_dir_p, job_list, outliers_from_latent, predict_from_cvae
from utils import omm_job, cvae_job

from CVAE import CVAE

# n_gpus = 16
# number of cvae jobs, starting from hyper_dim 3
n_cvae = 4
GPU_ids = [gpu.id for gpu in GPUtil.getGPUs()]
print('Available GPUs', GPU_ids)

os.environ["RABBITMQ_MNESIA_BASE"] = "~/.rabbit_base"
os.environ["RABBITMQ_LOG_BASE"] = "~/.rabbit_base/"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# top_file = os.path.abspath('../P27-all/C1B48/C1B48.top.gz')
# pdb_file = os.path.abspath('../P27-all/C1B48/C1B48.pdb.gz')
top_file = None
pdb_file = os.path.abspath('./pdb/100-fs-peptide-400K.pdb')
ref_pdb_file = os.path.abspath('./pdb/fs-peptide.pdb')

work_dir = os.path.abspath('./')

# create folders for store results
Example #37
0
        return new_hidden_state

    def forward(self, x):
        x = torch.transpose(x, 0, 1)
        output, (final_hidden_state, final_cell_state) = self.encoder(x, None)

        attn_output = self.attention_net(output, output[-1])
        attn_output = self.dropout(attn_output)
        fc_output = self.fc1(attn_output)
        output = self.softmax(fc_output)  # output layer using softmax function
        return output


print("model done")
if cuda_gpu:
    device_ids = GPUtil.getAvailable(limit=4)
    print(device_ids)
    if torch.cuda.device_count() == 1:
        lstmattn = AttentionLSTM(embedding_dim, hidden_dim, num_layers,
                                 output_size, dropout).cuda()
    else:
        torch.backends.cudnn.benchmark = True
        lstmattn = AttentionLSTM(embedding_dim, hidden_dim, num_layers,
                                 output_size, dropout).cuda(device_ids[0])
        lstmattn = nn.DataParallel(lstmattn, device_ids=device_ids)
else:
    lstmattn = AttentionLSTM(embedding_dim, hidden_dim, num_layers,
                             output_size, dropout)
print(lstmattn)

tokenizer = ''