Exemple #1
0
def fs_sharing():
    prev_strategy = mp.get_sharing_strategy()
    mp.set_sharing_strategy('file_system')
    try:
        yield
    finally:
        mp.set_sharing_strategy(prev_strategy)
Exemple #2
0
def fs_sharing():
    prev_strategy = multiprocessing.get_sharing_strategy()
    multiprocessing.set_sharing_strategy('file_system')
    try:
        yield
    finally:
        multiprocessing.set_sharing_strategy(prev_strategy)
def read_data(dataset: Union[Video_2D_Inference, Video_3D_Inference],
              batch_size: int, num_worker: int, data_queue: mp.Queue):
    mp.set_sharing_strategy('file_system')
    for item in DataLoader(dataset,
                           batch_size=batch_size,
                           num_workers=num_worker):
        data_queue.put(item)
def fs_sharing():
    prev_strategy = mp.get_sharing_strategy()
    mp.set_sharing_strategy('file_system')
    try:
        yield
    finally:
        mp.set_sharing_strategy(prev_strategy)
def main():

    print("{:=^100}".format(' Test '))
    print("run parameters: {}".format(sys.argv))

    import torch.multiprocessing as mp
    mp.set_sharing_strategy("file_system")  #is this necessary?ss

    # if output path does not exist, create it
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    model = MPNN(T=args.T, p=args.p, target=args.target, output_type=args.output_type, output_dim=args.output_dim,
                 readout_dim=args.readout_dim)
    model.load_state_dict(torch.load(args.model_path))
    model.eval()

    processes = []
    for rank in range(args.n_test_process):
        p = mp.Process(target=test, args=(rank, model))
        p.start()
        processes.append(p)

    print("joining {} processes.".format(len(processes)))

    for p in processes:
        p.join()
Exemple #6
0
def run_spawn(config):
    world_size = config.world_size
    master_address = config.federator_host
    nic = config.nic
    mp.set_sharing_strategy("file_system")
    mp.set_start_method("spawn", True)
    mp.spawn(run_single,
             args=(world_size, master_address, config, nic),
             nprocs=world_size,
             join=True)
Exemple #7
0
    def full_run(self, n_runs_per_device, n_processes_per_device,
                 devices_list, epochs_per_simulation: Union[int, List[int]]):
        """ Runs the experiment with multiple seeds, distributing the
        simulations across different devices and saving the results to disk.
        """
        if isinstance(epochs_per_simulation, int):
            epochs_per_simulation = [epochs_per_simulation]

        multiprocessing.set_sharing_strategy('file_system')
        context = multiprocessing.get_context('spawn')

        experiment_id = 0
        for simulation_factory, epochs in zip(self.simulation_factories,
                                              cycle(epochs_per_simulation)):
            # Create a pool for each device.
            pools = []
            for device in devices_list:
                pools.append(context.Pool(processes=n_processes_per_device))

            # For each pool execute jobs.
            results = []
            for pool_id, (pool, device) in enumerate(zip(pools, devices_list)):
                args = []
                for i in range(n_runs_per_device):
                    args.append((
                        simulation_factory,
                        pool_id * n_runs_per_device + i,  # seed,
                        device,
                        epochs))
                results.append(pool.starmap_async(_job, args))

            # Save all runs of the current simulation configuration to disk.
            all_outputs = []
            for pool, result in zip(pools, results):
                all_outputs += result.get()
                pool.close()
                pool.join()

            # Now process all the outputs to save only what we need.
            processed_outputs = []
            for simulation in all_outputs:
                simulation_history, used_seed, used_device = simulation
                processed_output = self.handle_simulation_output(
                    simulation_history)
                processed_outputs.append(
                    (processed_output, used_seed, used_device))

            # We create a new simulation object to get an identifier.
            simulation_identifier = self.construct_simulation_identifier(
                simulation_factory(0, torch.device('cpu')))
            # Write processed_outputs to disk.
            file_path = _outputs_prefix + self.name + '/experiment_' + \
                str(experiment_id)
            save_to_disk((simulation_identifier, processed_outputs), file_path)
            experiment_id += 1
Exemple #8
0
def run_in_parallel(times_per_device,
                    n_processes_per_device,
                    simulation_parameters,
                    pytorch_configs,
                    process_initialiser=None,
                    initialiser_args=()):
    """ Executes the set up experiment on the given devices.
        Parameters:

        times The number of times the given experiment has to be repeated on
                each device.
        n_processes_per_device The number of independent processes to be used
                for each pytorch_config.
        simulation_parameters An instance of subclass of SimulationParameters
                class.
        observers_factory A factory method for the required observers.
        pytorch_configs A list of PyTorchConfig objects specifying the devices
                and data types to be used. For example, this list could contain
                two gpus, then, setting n_processes = 2 (or more) both gpus
                will be used to execute the simulations.
        process_initialiser Each process will be initialised by calling this
                function.
        process_initialiser_args Arguments to be passed to the initialiser
                function. This parameter must be a list, providing the
                arguments for each different device. """

    multiprocessing.set_sharing_strategy('file_system')
    context = multiprocessing.get_context('spawn')

    pools = []

    for config, initargs in \
            itertools.zip_longest(pytorch_configs, initialiser_args):
        pools.append(
            context.Pool(processes=n_processes_per_device,
                         initializer=process_initialiser,
                         initargs=initargs))

    results = []
    for pool_id, (pool, config) in enumerate(zip(pools, pytorch_configs)):
        args = []
        for i in range(times_per_device):
            args.append((pool_id * times_per_device + i, simulation_parameters,
                         config))
        results.append(pool.starmap_async(job, args))

    all_outputs = []
    for pool, result in zip(pools, results):
        all_outputs += result.get()
        pool.close()
        pool.join()

    return _process_all_outputs(all_outputs)
Exemple #9
0
def main():

    import torch.multiprocessing as mp
    mp.set_sharing_strategy("file_system")
    mp = mp.get_context("forkserver")
    from src.model import MPNN

    torch.manual_seed(args.seed)

    print("{:=^100}".format(' Train '))
    print("experiment: {}".format(args.exp_name))
    print("run parameters: {} \n".format(sys.argv))

    model_path = args.model_path

    print("instantiating model...")
    model = MPNN(T=args.T,
                 p=args.p,
                 target=args.target,
                 output_type=args.output_type,
                 output_dim=args.output_dim,
                 readout_dim=args.readout_dim)
    if model_path is not None:
        model.load_state_dict(model_path)

    model.share_memory()

    print(model)

    # Train the model
    print("Training Model...")

    processes = []
    for rank in range(args.n_train_process):
        p = mp.Process(target=train, args=(rank, args, model))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()

    print("Finished training model")
Exemple #10
0
def log(args):
    ''' Folder settings when saving training results'''
    if not os.path.exists('result') and ~args.debug:
        os.makedirs('result')
    if not os.path.exists('result/' + args.info) and ~args.debug:
        os.mkdir('result/' + args.info)
    if not os.path.exists('result/' + args.info + '/img') and ~args.debug:
        os.mkdir('result/' + args.info + '/img')
    if not os.path.exists('result/' + args.info + '/scripts') and ~args.debug:
        os.mkdir('result/' + args.info + '/scripts')
    if not os.path.exists('result/' + args.info + '/ckp') and ~args.debug:
        os.mkdir('result/' + args.info + '/ckp')



    print('[*] Info:', time.ctime())
    print('[*] Info:', os.path.basename(__file__))

    # if ~args.debug and args.log == True and args.resume == False:
    if ~args.debug and args.resume == False:
        from shutil import copyfile
        copyfile(os.path.basename(__file__), 'result/' + args.info + '/scripts/' + os.path.basename(__file__))
        copyfile('config.py', 'result/' + args.info + '/scripts/config.py')
        copyfile('head.py', 'result/' + args.info + '/scripts/head.py')
        copyfile('train.py', 'result/' + args.info + '/scripts/train.py')
        copyfile('test.py', 'result/' + args.info + '/scripts/test.py')
        copytree('./data_loader/', 'result/' + args.info + '/scripts/data_loader')
        copytree('./model/', 'result/' + args.info + '/scripts/model')
        copytree('./utils/', 'result/' + args.info + '/scripts/utils')


    sys.stdout = Unbuffered(sys.stdout)
    torch.cuda.set_device(args.gpu_idx)

    from torch import multiprocessing
    multiprocessing.set_sharing_strategy('file_system')
    torch.set_num_threads(1)
Exemple #11
0
def main(data_dir: str, save_dir: str, segment: int):
    mp.set_sharing_strategy("file_system")
    os.makedirs(save_dir, exist_ok=True)
    wav2mel = Wav2Mel()
    file2mel = partial(process_files, wav2mel=wav2mel)

    meta_data = {}
    speakers = sorted(os.listdir(data_dir))

    for spk in tqdm(speakers):
        spk_dir = os.path.join(data_dir, spk)
        wav_files = librosa.util.find_files(spk_dir)
        mels = [file2mel(wav_file) for wav_file in wav_files]
        mels = list(
            filter(lambda x: x is not None and x.shape[-1] > segment, mels))
        rnd_paths = [f"{uuid4().hex}.pt" for _ in range(len(mels))]
        dummy = [
            torch.save(mel, os.path.join(save_dir, path))
            for (mel, path) in zip(mels, rnd_paths)
        ]
        meta_data[spk] = rnd_paths

    with open(os.path.join(save_dir, "metadata.json"), "w") as f:
        json.dump(meta_data, f, indent=4)
Exemple #12
0
def set_sharing_strategy(new_strategy=None):
    """
    https://pytorch.org/docs/stable/multiprocessing.html
    https://discuss.pytorch.org/t/how-does-one-setp-up-the-set-sharing-strategy-strategy-for-multiprocessing/113302
    https://stackoverflow.com/questions/66426199/how-does-one-setup-the-set-sharing-strategy-strategy-for-multiprocessing-in-pyto
    """
    from sys import platform

    if new_strategy is not None:
        mp.set_sharing_strategy(new_strategy=new_strategy)
    else:
        if platform == 'darwin':  # OS X
            # only sharing strategy available at OS X
            mp.set_sharing_strategy('file_system')
        else:
            # ulimit -n 32767 or ulimit -n unlimited (perhaps later do try catch to execute this increase fd limit)
            mp.set_sharing_strategy('file_descriptor')
Exemple #13
0
def actor(rank, args, T, BEST, memory_queue, model_queue, p2):
    mp.set_sharing_strategy('file_system')
    # rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
    # resource.setrlimit(resource.RLIMIT_NOFILE, (args.nofile, rlimit[1]))
    torch.manual_seed(args.seed + rank)
    print("Process {} fighting with {}".format(rank, p2))
    env = gym.make(args.env,
                   java_env_path="..",
                   port=args.port + rank * 2,
                   p2=p2)
    env.seed(args.seed + rank)
    model = ActorCritic(env.observation_space, env.action_space,
                        args.hidden_size)
    shared_average_model = ActorCritic(env.observation_space, env.action_space,
                                       args.hidden_size)
    memory = EpisodicReplayMemory(args.num_processes, args.max_episode_length)

    t = 1  # Thread step counter
    done = True  # Start new episode

    while T.value() <= args.T_max:
        # Actor loop
        t_value = T.value()
        discard = False
        round_score = 0
        episode_length = 0
        sum_entropy = 0
        if not model_queue.empty():
            print("Process {} going to load new model at EPISODE {}......".
                  format(rank, t_value))
            received_obj = model_queue.get()
            model_dict, average_model_dict = copy.deepcopy(received_obj)
            model.load_state_dict(model_dict)
            shared_average_model.load_state_dict(average_model_dict)
            print("Process {} finished loading new mode at EPISODE {}!!!!!!".
                  format(rank, t_value))
            del received_obj

        # Reset or pass on hidden state
        if done:
            hx, avg_hx = torch.zeros(1, args.hidden_size), torch.zeros(
                1, args.hidden_size)
            cx, avg_cx = torch.zeros(1, args.hidden_size), torch.zeros(
                1, args.hidden_size)
            # Reset environment and done flag
            try:
                with timeout(seconds=30):
                    s = env.reset()
            except TimeoutError:
                print("Time out to reset env")
                env.close()
                continue
            state = state_to_tensor(s)
            action_mask = [[False for _ in range(56)]]
            action_mask = torch.BoolTensor(action_mask)
            done = False
        else:
            # Perform truncated backpropagation-through-time (allows freeing buffers after backwards call)
            hx = hx.detach()
            cx = cx.detach()

        # Lists of outputs for training
        policies, Qs, Vs, actions, rewards, average_policies = [], [], [], [], [], []

        while not done:
            # Calculate policy and values
            policy, Q, V, (hx, cx) = model(state, (hx, cx), action_mask)
            average_policy, _, _, (avg_hx, avg_cx) = shared_average_model(
                state, (avg_hx, avg_cx), action_mask)

            # Sample action
            action = torch.multinomial(policy, 1)[0, 0]
            sum_entropy += Categorical(probs=policy.detach()).entropy()

            # Step
            next_state, reward, done, info = env.step(action.item())
            valid_actions = info.get('my_action_enough', {})
            # get valid actions
            if len(valid_actions) > 0:
                action_mask = [[
                    False if i in valid_actions else True for i in range(56)
                ]]
            else:
                action_mask = [[False for _ in range(56)]]
            action_mask = torch.BoolTensor(action_mask)
            round_score += reward
            if info.get('no_data_receive', False):
                env.close()
                discard = True
                memory.append_transition(state,
                                         None,
                                         None,
                                         None,
                                         action_mask.detach(),
                                         discard=discard)
                break
            next_state = state_to_tensor(next_state)
            reward = args.reward_clip and min(max(
                reward, -1), 1) or reward  # Optionally clamp rewards

            # Save (beginning part of) transition for offline training
            memory.append_transition(state, action, reward, policy.detach(),
                                     action_mask.detach())  # Save just tensors
            [
                arr.append(el)
                for arr, el in zip((policies, Qs, Vs, actions, rewards,
                                    average_policies),
                                   (policy, Q, V, torch.LongTensor([[action]]),
                                    torch.Tensor([[reward]]), average_policy))
            ]

            # Increment counters
            t += 1
            episode_length += 1  # Increase episode counter

            # Update state
            state = next_state

        if discard:
            done = True
            continue

        # Finish on-policy episode
        # Do not need to increate T in actor
        # T.increment()
        print(
            """Process: {}, EPISODE: {},BEST: {}, episode: {}, round_reward: {}"""
            .format(rank, t_value, BEST.value(), t, round_score))

        # Save terminal state for offline training
        memory.append_transition(state, None, None, None, action_mask.detach())
        last_trajectory = copy.deepcopy(memory.last_trajectory())
        on_policy_data = (last_trajectory, (episode_length, round_score,
                                            sum_entropy / episode_length))
        send_object = copy.deepcopy(on_policy_data)
        memory_queue.put(send_object, )
        print("Process {} send trajectory".format(rank))
        # TODO: add TD error of the trajectory as the priority
        done = True
    env.close()
Exemple #14
0
import torch.multiprocessing as mp
from torch.multiprocessing import set_sharing_strategy, set_start_method
try:
    set_start_method('spawn')
    set_sharing_strategy("file_descriptor")
except RuntimeError:
    pass
import torch
import os, sys, pdb
from eval_env import FileEnv
from utils import log

done = mp.Event()


class ParallelSampler:
    """
    Manages multithreaded sampling from a FileEnv
    """
    NUM_WORKERS = 1

    def __init__(self, file_env_args, tac_template, agent, train=False):
        "sampler"
        self.tac_template = tac_template
        self.file_env_args = file_env_args
        self.agent = agent
        self.train = train
        agent.model.share_memory()
        # os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    def sample_trajectories(self, n_epochs=1, **kwargs):
Exemple #15
0
no limitation of timestep one batch


1. separate ProcessUnit to preprocess.py
2. add shared noise table
'''
import os
import click
import gym
import torch
import time
import pickle
import logging
import numpy as np
import torch.multiprocessing as mp
mp.set_sharing_strategy('file_system')

from config import N_POPULATION, N_GENERATION, LR, SIGMA, TIMESTEP_LIMIT, reference_batch_size
from optimizer import SGD
from train import train, test, explore_for_vbn

torch.set_num_threads(1)
LogFolder = os.path.join(os.getcwd(), 'log')
model_storage_path = '/home/yyl/model/es-rl/'

Small_value = -1000000


def setup_logging(logfile):
    if logfile == 'default.log':
        timenow = time.localtime(time.time())
Exemple #16
0
def detection_by_tracking(
        frame_dir,
        json_file,
        tracker_model,
        detection_threshold=0.9,
        tracking_threshold=0.9,
        save_json_file="data/demo_tracking/detection_by_tracking.tracking_json",
        offset=0,
        low=None,
        high=None,
        step=1,
        parallel=False,
        multithreading=False):
    # Load annotations
    data = json.load(open(json_file, "r"))

    annotations = dict()
    for annotation in data['annotations']:
        if annotation['image_id'] in annotations:
            annotations[annotation['image_id']] += [annotation]
        else:
            annotations[annotation['image_id']] = [annotation]

    # Load frames
    frame_files = general_utils.get_all_files(frame_dir,
                                              keep_dir=True,
                                              sort=True)
    num_frame = len(frame_files)

    tracking_data = dict()
    tracking_data["images"] = data["images"]
    tracking_data["categories"] = data["categories"]
    tracking_data["annotations"] = list()

    if low is None:
        low = -int(1e9)

    if high is None:
        high = int(1e9)

    start = time.time()
    last_count = 0

    # Set up parallel processing
    if parallel:
        mp.set_start_method('spawn', force=True)
        mp.set_sharing_strategy('file_system')

        pool = Pool()
    else:
        pool = None
    results = [None for _ in range(num_frame)]

    # Set up multithreading processing
    if multithreading:
        executor = ThreadPoolExecutor()
    else:
        executor = None

    # Loop over frames
    for frame_id in range(num_frame):
        # Align id
        frame_id += offset

        num_box = len(annotations[frame_id])

        # Count boxes with high confidence
        count = 0
        for box_id in range(num_box):
            score = annotations[frame_id][box_id]["score"]
            if score > detection_threshold:
                count += 1

        # If this frame has more boxes, track from it for certain; else check skip criteria
        if count <= last_count:
            last_count = count

            # Skip frame
            if frame_id % step != 0:
                continue
        else:
            last_count = count

        print("Process frame ", frame_id)

        forward_tracker = build_tracker(tracker_model)
        backward_tracker = build_tracker(tracker_model)

        # Loop over detection boxes
        for box_id in range(num_box):
            # print("=> Process box ", box_id)

            # Filter by detection score
            score = annotations[frame_id][box_id]["score"]
            if score < detection_threshold:
                # print("==> Skip")
                continue

            if multithreading:
                print(
                    f"---> Multithread tracking for box {box_id} frame {frame_id}"
                )
                executor.submit(single_box_in_single_frame_tracking,
                                (frame_files, frame_id, box_id, annotations,
                                 tracking_threshold, forward_tracker,
                                 backward_tracker, offset, low, high))

            if parallel:
                print(
                    f"---> Parallel tracking for box {box_id} frame {frame_id}"
                )
                results[frame_id - offset] = pool.apply_async(
                    single_box_in_single_frame_tracking, [
                        frame_files, frame_id, box_id, annotations,
                        tracking_threshold, forward_tracker, backward_tracker,
                        offset, low, high
                    ])

            if not multithreading and not parallel:
                tracking_data[
                    "annotations"] += single_box_in_single_frame_tracking(
                        frame_files, frame_id, box_id, annotations,
                        tracking_threshold, forward_tracker, backward_tracker,
                        offset, low, high)

    for result in results:
        if result is not None:
            tracking_data["annotations"] += result.get()

    end = time.time()
    print(f"Total time: {(end - start)} s")

    with open(save_json_file, "w") as outfile:
        json.dump(tracking_data, outfile)
Exemple #17
0
from caffe2.torch.fb.distributed.model_parallel.share_memory import (
    ShareMemoryRPCPickler, )
from caffe2.torch.fb.distributed.pytorch.adagrad_jit import (
    Adagrad as FunctionalAdagrad,
    RowWiseSparseAdagrad,
)
from caffe2.torch.fb.training_toolkit.backend.data.dpp_session import DppSession
from torch import multiprocessing, nn
from torch.distributed import rpc
from torch.distributed.rpc.api import _use_rpc_pickler
from torch.nn import functional as F

from .iteration_controller import IterationControllerFactory

# only support "file_system". See comments in comm.ShareMemory for detail
multiprocessing.set_sharing_strategy("file_system")
_BATCH_COUNT_PER_PRINT = 100


class Trainer:
    r"""
    Multi threading Hogwild trainer with EASGD and DPP
    """
    def __init__(
        self,
        model: nn.Module,
        ea_client: ElasticAveragingClient,
        use_multithread_hogwild: bool,
        hogwild_workers_names: List[str],
        iteration_controller_factory: IterationControllerFactory,
        loss_fn: Optional[torch.jit.ScriptModule] = None,
Exemple #18
0
from typing import Dict, Tuple, List

#
# Multiprocess input pipeline
# -------------------------------
#
# single epoch batch generators with multiple subprocesses, each subprocess works on its own file until the file is parsed completely
#
# - the processes have as little communication as possible (because it is prohibitly expensive in python)
# - the finished batches go into shared memory and then the queue to be picked up by the train/validaton loops
#

mp.get_logger().setLevel(
    logging.WARNING)  # ignore useless process start console logs
mp.set_sharing_strategy(
    "file_system"
)  # VERY MUCH needed for linux !! makes everything MUCH faster -> from 10 to 30+ batches/s

fasttext_vocab_cached_mapping = None
fasttext_vocab_cached_data = None


#
# we need to wrap the individual process queues, because they might be filled in different order
# now we make sure to always get the same training samples in the same order for all runs
#
class DeterministicQueue():
    def __init__(self, distributed_queues):
        self.distributed_queues = distributed_queues
        self.num_queues = len(distributed_queues)
        self.current_idx = 0
Exemple #19
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--DescEmb',
                        action='store_true',
                        help='True if DescEmb, False if CodeEmb')
    parser.add_argument('--source_file',
                        choices=['mimic', 'eicu', 'both'],
                        type=str,
                        default='mimic',
                        help='both for pooling')
    parser.add_argument('--target',
                        choices=[
                            'readmission', 'mortality', 'los>3day', 'los>7day',
                            'dx_depth1_unique'
                        ],
                        type=str,
                        default='readmission')
    parser.add_argument('--item', choices=['all'], type=str, default='lab')
    parser.add_argument('--time_window',
                        choices=['12', '24', '36', '48', 'Total'],
                        type=str,
                        default='12')
    parser.add_argument('--batch_size', type=int, default=256)
    parser.add_argument('--dropout', type=float, default=0.3)
    parser.add_argument('--embedding_dim', type=int, default=128)
    parser.add_argument('--hidden_dim', type=int, default=256)
    parser.add_argument('--n_epochs', type=int, default=1000)
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--max_length', type=str, default='150')
    parser.add_argument('--bert_model',
                        choices=[
                            'bert', 'bio_clinical_bert', 'bio_bert',
                            'pubmed_bert', 'blue_bert', 'bert_mini',
                            'bert_tiny', 'bert_small'
                        ],
                        type=str)
    parser.add_argument('--cls_freeze', action='store_true')
    parser.add_argument('--input_path',
                        type=str,
                        default='/home/jylee/data/pretrained_ehr/input_data/',
                        help='data directory')
    parser.add_argument(
        '--path',
        type=str,
        default='/home/jylee/data/pretrained_ehr/output/KDD_output/',
        help='model saving directory')
    args = parser.parse_args()

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    if args.DescEmb:
        from dataset.DescEmb_dataloader import DescEmb_get_dataloader as get_dataloader
        from trainer.DescEmb_trainer import DescEmb_Trainer as Trainer
        if args.cls_freeze:
            print('DesEmb-FR')
        else:
            print('DescEmb-FT')

    elif not args.DescEmb:
        from dataset.CodeEmb_dataloader import CodeEmb_get_dataloader as get_dataloader
        from trainer.CodeEmb_trainer import Trainer
        print('CodeEmb')

    mp.set_sharing_strategy('file_system')

    SEED = [2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029]

    for seed in SEED:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # if use multi-GPU
        torch.backends.cudnn.deterministic = True

        args.seed = seed
        print('seed_number', args.seed)

        train_loader = get_dataloader(args=args, data_type='train')

        trainer = Trainer(args, train_loader, device)
        trainer.train()

        print('Finished training seed: {}'.format(seed))
Exemple #20
0
import torch.multiprocessing as mp

mp.set_sharing_strategy('file_system')  # otherwise, weird bug
import numpy as np
import time
import torch
import os
from torch.utils.data import Dataset, DataLoader
from librosa.core import load as loadwav
import json
import pickle
from tqdm import tqdm
import pdb
from scattering_autoencoder.scattering_recurrent import RecurrentScatteringNP


def get_files_timit(path, **kwargs):
    """
    Explores the TIMIT folder to retrieve all wav addresses
    """
    all_files = {}
    regions = os.listdir(path)
    for id_region in range(len(regions)):
        speakers = os.listdir(os.path.join(path, regions[id_region]))
        for id_speaker in range(len(speakers)):
            subdir = os.path.join(path, regions[id_region],
                                  speakers[id_speaker])
            files = [f for f in os.listdir(subdir) if '.WAV' in f]
            for f in files:
                prefix = str.split(f, '.')[0]
                key = (regions[id_region], speakers[id_speaker], prefix)
Exemple #21
0
import torch.nn.functional as F
import torch
#DIST
import torch.distributed as dist
import torch.multiprocessing as multiprocessing
from torch.multiprocessing import Process
from datasets import DatasetManager
from fid_score import *
from inception import *
from time import sleep, time
import random
import sys
from scipy import stats
from queue import Queue

multiprocessing.set_sharing_strategy('file_system')
torch.autograd.set_detect_anomaly(True)
# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)


def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)
    elif classname.find("BatchNorm") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)
Exemple #22
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--DescEmb', action='store_true')
    parser.add_argument('--source_file',
                        choices=['mimic', 'eicu', 'both'],
                        type=str)
    parser.add_argument('--few_shot',
                        choices=[0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
                        type=float)  # training_dataset_size ratio
    parser.add_argument('--target',
                        choices=[
                            'readmission', 'mortality', 'los>3day', 'los>7day',
                            'dx_depth1_unique'
                        ],
                        type=str)
    parser.add_argument('--item', choices=['all'], type=str)
    parser.add_argument('--max_length', type=str, default='150')
    parser.add_argument('--bert_model',
                        choices=[
                            'bio_clinical_bert', 'bio_bert', 'pubmed_bert',
                            'blue_bert', 'bert', 'bert_mini', 'bert_small'
                        ],
                        type=str)
    parser.add_argument(
        '--path',
        type=str,
        default='/home/jylee/data/pretrained_ehr/output/KDD_output2/')
    parser.add_argument('--cls_freeze', action='store_true')
    parser.add_argument('--input_path',
                        type=str,
                        default='/home/jylee/data/pretrained_ehr/input_data/')
    args = parser.parse_args()

    args.time_window = '12'
    args.rnn_model_type = 'gru'
    args.batch_size = 512
    args.n_epochs = 1000
    # hyperparameter tuning
    args.dropout = 0.3
    args.embedding_dim = 128
    args.hidden_dim = 256
    args.lr = 1e-4

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    mp.set_sharing_strategy('file_system')

    SEED = [2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029]

    for seed in SEED:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

        args.seed = seed

        train_loader = get_dataloader(args=args, data_type='train')

        Trainer = DataSize_Trainer(args, train_loader, device)

        Trainer.train()
Exemple #23
0
def embed(n_epochs, eval_every, gpu, train_threads, sparse, tensorboard_dir,
          embed_manifold_name, embed_manifold_dim, embed_manifold_params,
          loss_params, conformal_loss_params, sample_neighbors_every,
          resume_training, model, _log):
    model_ingredient_data = model
    device = torch.device(f'cuda:{gpu}' if gpu >= 0 else 'cpu')
    torch.set_num_threads(1)

    logging_thread.initialize(tensorboard_dir, _log)

    curvature_scale = [
        torch.nn.Parameter(torch.tensor(0.)),
        torch.nn.Parameter(torch.tensor(0.)),
        torch.tensor(0., requires_grad=False)
    ]
    embed_manifold_params = embed_manifold_params.copy()
    embed_manifold_params["curvature_scale"] = curvature_scale
    embed_manifold = RiemannianManifold.from_name_params(
        embed_manifold_name, embed_manifold_params)
    tensorboard_watch = {
        "hyper_scale": curvature_scale[0],
        "sphere_scale": curvature_scale[1]
    }
    data, eval_data = load_dataset(embed_manifold)
    embed_eval.initialize_eval(adjacent_list=get_adjacency_dict(data))
    if resume_training:
        model, save_data = load_model()
        model.to(device)
        if "features" in save_data:
            model = FeaturizedModelEmbedding(model,
                                             data.features,
                                             save_data["in_manifold"],
                                             embed_manifold,
                                             embed_manifold_dim,
                                             device=device)
    else:
        model = gen_model(data, device, embed_manifold, embed_manifold_dim)

    if train_threads > 1:
        mp.set_sharing_strategy('file_system')
        model = model.share_memory()

    if model_ingredient_data["input_manifold"] == "Spherical":
        feature_manifold = RiemannianManifold.from_name_params(
            "SphericalManifold", None)
    else:
        feature_manifold = RiemannianManifold.from_name_params(
            "EuclideanManifold", None)

    shared_params = {
        "manifold": embed_manifold,
        "dimension": embed_manifold_dim,
        "objects": data.objects,
        "in_manifold": feature_manifold
    }
    if hasattr(model, "get_additional_embeddings"
               ) and model.get_additional_embeddings() is not None:
        optimizer = RiemannianSGD(
            [
                {
                    'params': model.get_savable_model().parameters()
                },
                # {'params': model.main_deltas.parameters(), 'lr':300},
                # {'params': model.additional_deltas.parameters(), 'lr':300},
                # {'params': curvature_scale[:2], 'lr':0.001},
                {
                    'params': model.get_additional_embeddings().parameters(),
                    'lr': get_fixed_embedding_lr()
                }
            ],
            lr=get_base_lr(),
            adam_for_euc=False)
        # optimizer = RiemannianSGD(list(model.get_savable_model().parameters()) + list(model.get_additional_embeddings().parameters()) + curvature_scale[1:], lr=get_base_lr(), adam_for_euc=False)
    else:
        optimizer = RiemannianSGD(
            [{
                'params': model.get_savable_model().parameters()
            }
             # {'params': curvature_scale[:2], 'lr':0.001}
             ],
            lr=get_base_lr(),
            adam_for_euc=False)
    lr_scheduler = get_lr_scheduler(optimizer)

    threads = []
    if train_threads > 1:
        try:
            for i in range(train_threads):
                args = [
                    device, model, embed_manifold, embed_manifold_dim, data,
                    optimizer, loss_params, n_epochs, eval_every,
                    sample_neighbors_every, lr_scheduler, shared_params, i,
                    feature_manifold, conformal_loss_params, tensorboard_watch,
                    eval_data
                ]
                threads.append(mp.Process(target=train, args=args))
                threads[-1].start()

            for thread in threads:
                thread.join()
        finally:
            for thread in threads:
                try:
                    thread.close()
                except:
                    thread.terminate()
            # embed_eval.close_thread(wait_to_finish=True)
            logging_thread.close_thread(wait_to_finish=True)

    else:
        args = [
            device, model, embed_manifold, embed_manifold_dim, data, optimizer,
            loss_params, n_epochs, eval_every, sample_neighbors_every,
            lr_scheduler, shared_params, 0, feature_manifold,
            conformal_loss_params, tensorboard_watch, eval_data
        ]
        try:
            train(*args)
        finally:
            # embed_eval.close_thread(wait_to_finish=True)
            logging_thread.close_thread(wait_to_finish=True)
Exemple #24
0
import logging
import math
import numpy as np
import torch.multiprocessing as mp
import sys
import matplotlib.pyplot as plt

from src.optimizer import optimize_parallel
from src.train import train_individual,train_individual_cpu,test
from src.train import train_parallel,train_serial
from src.model import build_model, build_mean,build_sigma
from src.util import mk_folder,save, load, setup_logging
from src.vbn import explore_for_vbn

# set up multiprocessing
mp.set_sharing_strategy("file_system")
# log and save path setting
torch.set_num_threads(1)


class ARGS(object):
    """
    Global shared setting.   
    """

    env_type = "atari"

    state_dim = 0
    action_dim = 0
    action_lim = 0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--DescEmb', action='store_true')
    parser.add_argument('--source_file',
                        choices=['mimic', 'eicu'],
                        type=str,
                        default='mimic')
    parser.add_argument('--test_file',
                        choices=['mimic', 'eicu', 'both'],
                        type=str,
                        default='eicu')
    parser.add_argument('--few_shot',
                        type=float,
                        choices=[0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
                        default=0.0)
    parser.add_argument('--target',
                        choices=[
                            'readmission', 'mortality', 'los>3day', 'los>7day',
                            'dx_depth1_unique'
                        ],
                        type=str,
                        default='readmission')
    parser.add_argument('--item', choices=['all'], type=str, default='med')
    parser.add_argument('--time_window',
                        choices=['12', '24', '36', '48', 'Total'],
                        type=str,
                        default='12')
    parser.add_argument('--batch_size', type=int, default=512)
    parser.add_argument('--dropout', type=float, default=0.3)
    parser.add_argument('--embedding_dim', type=int, default=128)
    parser.add_argument('--hidden_dim', type=int, default=256)
    parser.add_argument('--n_epochs', type=int, default=1000)
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--max_length', type=str, default='150')
    parser.add_argument('--bert_model',
                        choices=[
                            'bio_clinical_bert', 'bio_bert', 'pubmed_bert',
                            'blue_bert', 'bert_mini', 'bert_tiny'
                        ],
                        type=str,
                        default='bio_bert')
    parser.add_argument('--input_path',
                        type=str,
                        default='/home/jylee/data/pretrained_ehr/input_data/',
                        help='data directory')
    parser.add_argument(
        '--path',
        type=str,
        default='/home/jylee/data/pretrained_ehr/output/KDD_output/',
        help='model parameter directory')
    parser.add_argument('--cls_freeze', action='store_true')
    args = parser.parse_args()

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    if args.source_file == args.test_file:
        assert args.few_shot == 0.0, "there is no few_shot if source and test file are the same"

    mp.set_sharing_strategy('file_system')

    SEED = [2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029]

    for seed in SEED:
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True

        args.seed = seed

        train_loader = get_test_dataloader(args=args, data_type='train')
        valid_loader = get_test_dataloader(args=args, data_type='eval')
        test_loader = get_test_dataloader(args=args, data_type='test')

        tester = Tester(args, train_loader, valid_loader, test_loader, device,
                        seed)

        if args.few_shot == 0.0:
            print('Only test')
            tester.zero_shot_test()
        else:
            print('Train then test')
            tester.train()