Python get_num_interop_threads Beispiele, torch.get_num_interop_threads Python Beispiele

Beispiel #1

0

Datei anzeigen

def convert(topology, backend, device, extra_config={}):
    """
    This function is used to convert a `onnxconverter_common.topology.Topology` object into a *backend* model.

    Args:
        topology: The `onnxconverter_common.topology.Topology` object that will be converted into a backend model
        backend: Which backend the model should be run on
        device: Which device the translated model will be run on
        extra_config: Extra configurations to be used by individual operator converters

    Returns:
        A model implemented in the selected backend
    """
    assert topology is not None, "Cannot convert a Topology object of type None."
    assert backend is not None, "Cannot convert a Topology object into backend None."
    assert device is not None, "Cannot convert a Topology object into device None."

    tvm_backend = None
    operator_map = {}

    if tvm_installed():
        import tvm
        from tvm import relay
        from tvm.contrib import graph_runtime

        tvm_backend = tvm.__name__

    for operator in topology.topological_operator_iterator():
        try:
            converter = get_converter(operator.type)

            if backend == onnx.__name__:
                # vers = LooseVersion(torch.__version__)
                # allowed_min = LooseVersion("1.6.0")
                # Pytorch <= 1.6.0 has a bug with exporting GEMM into ONNX.
                # For the moment only tree_trav is enabled for pytorch <= 1.6.0
                # if vers < allowed_min:
                extra_config[constants.TREE_IMPLEMENTATION] = "tree_trav"

            operator_map[operator.full_name] = converter(
                operator, device, extra_config)
        except ValueError:
            raise MissingConverter(
                "Unable to find converter for {} type {} with extra config: {}."
                .format(operator.type,
                        type(getattr(operator, "raw_model", None)),
                        extra_config))
        except Exception as e:
            raise e

    # Set the parameters for the model / container
    n_threads = None if constants.N_THREADS not in extra_config else extra_config[
        constants.N_THREADS]
    batch_size = None if constants.BATCH_SIZE not in extra_config else extra_config[
        constants.BATCH_SIZE]

    # We set the number of threads for torch here to avoid errors in case we JIT.
    # We set intra op concurrency while we force operators to run sequentially.
    # We can revise this later, but in general we don't have graphs requireing inter-op parallelism.
    if n_threads is not None:
        if torch.get_num_interop_threads() != 1:
            torch.set_num_interop_threads(1)
        torch.set_num_threads(n_threads)

    operators = list(topology.topological_operator_iterator())
    torch_model = _PyTorchBackendModel(topology.raw_model.input_names,
                                       topology.raw_model.output_names,
                                       operator_map, operators,
                                       extra_config).eval()

    if backend == onnx.__name__:
        onnx_model_name = output_model_name = None
        target_opset = 11

        # Set optional configuration options for ONNX if any.
        if constants.ONNX_OUTPUT_MODEL_NAME in extra_config:
            onnx_model_name = extra_config[constants.ONNX_OUTPUT_MODEL_NAME]
            output_model_name = onnx_model_name + ".onnx"
        if constants.ONNX_TARGET_OPSET in extra_config:
            target_opset = extra_config[constants.ONNX_TARGET_OPSET]
        if output_model_name is None:
            output_model_name = str(uuid4().hex) + ".onnx"

        # Put the tracing test input into the right format.
        batch_trace_input, _ = _get_trace_input_from_test_input(
            extra_config[constants.TEST_INPUT], batch_size)

        # Generate the ONNX models
        torch.onnx.export(
            torch_model,
            batch_trace_input,
            output_model_name,
            input_names=topology.raw_model.input_names,
            output_names=topology.raw_model.output_names,
            keep_initializers_as_inputs=False,
            opset_version=target_opset,
            do_constant_folding=True,
        )
        hb_model = onnx.load(output_model_name)
        os.remove(output_model_name)

        # Set the ONNX model name if any.
        if onnx_model_name is not None:
            hb_model.graph.name = onnx_model_name

        # Fix the model to use arbitrary batch dimensions
        def fix_dim(dim):
            updated = False
            if dim.HasField("dim_value"):
                dim.Clear()
                updated = True
                dim.dim_param = "sym"

            return updated

        def fix_value_info(value):
            num_fixed = 0
            if value.type.HasField("tensor_type"):
                shape = value.type.tensor_type.shape
                if shape:
                    dim = shape.dim[0]
                    if fix_dim(dim):
                        num_fixed += 1

            return num_fixed

        def fix_graph(graph):
            num_fixed = 0
            for input in graph.input:
                num_fixed += fix_value_info(input)

            for output in graph.output:
                num_fixed += fix_value_info(output)

            for node in graph.node:
                for attr in node.attribute:
                    if attr.HasField("g"):
                        num_fixed += fix_graph(attr.g)

            return num_fixed

        fix_graph(hb_model.graph)
    elif backend == tvm_backend:
        # First we need to generate the torchscript model.
        batch_trace_input, remainder_trace_input = _get_trace_input_from_test_input(
            extra_config[constants.TEST_INPUT], batch_size)
        ts_model = _jit_model(torch_model, batch_trace_input, "cpu",
                              extra_config)
        if remainder_trace_input is not None:
            remainder_ts_model = _jit_model(torch_model, remainder_trace_input,
                                            "cpu", extra_config)

        # Generate the test input in the TVM format. In case we have a remainder beyond the batch, generate a remainder test input as well.
        test_input = [(
            topology.raw_model.input_names[i],
            batch_trace_input[i].shape
            if type(batch_trace_input) is tuple else batch_trace_input.shape,
        ) for i in range(len(topology.raw_model.input_names))]
        if remainder_trace_input is not None:
            remainder_test_input = [(
                topology.raw_model.input_names[i],
                remainder_trace_input[i].shape
                if type(remainder_trace_input) is tuple else
                remainder_trace_input.shape,
            ) for i in range(len(topology.raw_model.input_names))]

        # Pick the proper target.
        if device == "cuda":
            target = tvm.target.cuda()
            ctx = tvm.gpu()
        elif device == "cpu":
            target = "llvm"
            ctx = tvm.cpu()
        elif "llvm" in device:
            target = device
            ctx = tvm.cpu()
        else:
            raise RuntimeError("Device {} not recognized".format(device))

        # Get configuration parameters.
        config = {}
        if constants.TVM_MAX_FUSE_DEPTH in extra_config:
            config["relay.FuseOps.max_depth"] = extra_config[
                constants.TVM_MAX_FUSE_DEPTH]
        else:
            # 50 is a good depth for operator fusion. More than that will probably hurt performance.
            # https://github.com/microsoft/hummingbird/issues/232#issuecomment-697979508
            config["relay.FuseOps.max_depth"] = 50

        # Create the relay version of the model.
        model, params = relay.frontend.from_pytorch(ts_model, test_input)
        if remainder_trace_input is not None:
            remainder_model, remainder_params = relay.frontend.from_pytorch(
                remainder_ts_model, remainder_test_input)

        # Generate the model. We set opt_level=3 to enable all optimizations.
        with tvm.transform.PassContext(opt_level=3, config=config):
            graph, lib, params = relay.build(model,
                                             target=target,
                                             params=params)
        tvm_model = graph_runtime.create(graph, lib, ctx)
        tvm_model.set_input(**params)
        if remainder_trace_input is not None:
            with tvm.transform.PassContext(opt_level=3, config=config):
                graph, lib, params = relay.build(remainder_model,
                                                 target=target,
                                                 params=remainder_params)
            tvm_remainder_model = graph_runtime.create(graph, lib, ctx)
            tvm_remainder_model.set_input(**params)

        # In the container we will be using the context to properly configure the input tensors.
        extra_config[constants.TVM_CONTEXT] = ctx
        extra_config[
            constants.TVM_INPUT_NAMES] = topology.raw_model.input_names
        if remainder_trace_input is not None:
            extra_config[constants.TVM_REMAINDER_MODEL] = tvm_remainder_model

        hb_model = tvm_model
    else:
        # Set the device for the model.
        if device != "cpu":
            if backend == torch.__name__ or torch.jit.__name__:
                torch_model = torch_model.to(device)

        # If the backend is tochscript, jit the model.
        if backend == torch.jit.__name__:
            trace_input, _ = _get_trace_input_from_test_input(
                extra_config[constants.TEST_INPUT], batch_size)
            if device != "cpu":
                trace_input.to(device)
            torch_model = torch.jit.trace(torch_model, trace_input).eval()
            torch.jit.optimized_execution(torch_model)

        hb_model = torch_model

    # Return if the container is not needed.
    if constants.CONTAINER in extra_config and not extra_config[
            constants.CONTAINER]:
        return hb_model

    # We scan the operators backwards until we find an operator with a defined type.
    # This is necessary because ONNX models can have arbitrary operators doing casting, reshaping etc.
    idx = len(operators) - 1
    while (idx >= 0 and not operator_map[operators[idx].full_name].regression
           and not operator_map[operators[idx].full_name].classification
           and not operator_map[operators[idx].full_name].anomaly_detection
           and not operator_map[operators[idx].full_name].transformer):
        idx -= 1

    assert idx >= 0, "Cannot detect container type. Please fill an issue at https://github.com/microsoft/hummingbird."

    # If is a transformer, we need to check whether there is another operator type before.
    # E.g., normalization after classification.
    tmp_idx = idx
    if operator_map[operators[idx].full_name].transformer:
        while (idx >= 0
               and not operator_map[operators[idx].full_name].regression
               and not operator_map[operators[idx].full_name].classification
               and
               not operator_map[operators[idx].full_name].anomaly_detection):
            idx -= 1
        if idx < 0:
            idx = tmp_idx

    # Get the proper container type.
    if operator_map[operators[idx].full_name].regression:
        # We are doing a regression task.
        if backend == torch.jit.__name__:
            container = TorchScriptSklearnContainerRegression
        elif backend == onnx.__name__:
            container = ONNXSklearnContainerRegression
        elif backend == tvm_backend:
            container = TVMSklearnContainerRegression
        else:
            container = PyTorchSklearnContainerRegression
    elif operator_map[operators[idx].full_name].anomaly_detection:
        # We are doing anomaly detection.
        if backend == torch.jit.__name__:
            container = TorchScriptSklearnContainerAnomalyDetection
        elif backend == onnx.__name__:
            container = ONNXSklearnContainerAnomalyDetection
        elif backend == tvm_backend:
            container = TVMSklearnContainerAnomalyDetection
        else:
            container = PyTorchSklearnContainerAnomalyDetection
    elif operator_map[operators[idx].full_name].transformer:
        # We are just transforming the input data.
        if backend == torch.jit.__name__:
            container = TorchScriptSklearnContainerTransformer
        elif backend == onnx.__name__:
            container = ONNXSklearnContainerTransformer
        elif backend == tvm_backend:
            container = TVMSklearnContainerTransformer
        else:
            container = PyTorchSklearnContainerTransformer
    else:
        # We are doing a classification task.
        if backend == torch.jit.__name__:
            container = TorchScriptSklearnContainerClassification
        elif backend == onnx.__name__:
            container = ONNXSklearnContainerClassification
        elif backend == tvm_backend:
            container = TVMSklearnContainerClassification
        else:
            container = PyTorchSklearnContainerClassification

    n_threads = None if constants.N_THREADS not in extra_config else extra_config[
        constants.N_THREADS]
    batch_size = None if constants.BATCH_SIZE not in extra_config else extra_config[
        constants.BATCH_SIZE]
    hb_model = container(hb_model,
                         n_threads,
                         batch_size,
                         extra_config=extra_config)

    return hb_model

Beispiel #2

0

Datei anzeigen

def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=14,
                        metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr',
                        type=float,
                        default=1.0,
                        metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument(
        '--num_cpus',
        type=int,
        default=1,
        metavar='N',
        help='number of CPU vCores to train with (default: use all available)')
    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    print()
    print("Number of CPU vCores specified to be used {}".format(args.num_cpus))
    print("Total # of CPU threads on OS {}".format(os.cpu_count()))
    print("Total # of usable CPU threads on OS {}".format(
        len(os.sched_getaffinity(0))))

    print("Total # of Intra-op CPU threads - PyTorch {}".format(
        torch.get_num_threads()))
    print("Total # of Inter-op threads - PyTorch {}".format(
        torch.get_num_interop_threads()))
    print()
    print("Setting # of Intra-op and Inter-op CPU threads in PyTorch to {}".
          format(args.num_cpus))
    torch.set_num_threads(args.num_cpus)
    torch.set_num_interop_threads(args.num_cpus)
    print()
    print("Total # of Intra-op CPU threads - PyTorch {}".format(
        torch.get_num_threads()))
    print("Total # of Inter-op threads - PyTorch {}".format(
        torch.get_num_interop_threads()))
    print()

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)

    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    for epoch in range(1, args.epochs + 1):
        epoch_start = time.time()
        train(args, model, device, train_loader, optimizer, epoch)
        elapse_time = time.time() - epoch_start
        elapse_time = datetime.timedelta(seconds=elapse_time)
        print("Epoch training time {}".format(elapse_time))
        test(args, model, device, test_loader)
        scheduler.step()

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")

Beispiel #3

0

Datei anzeigen

    print('[prepare data]')
    trainset, valset = Criteo.prepare_Criteo(root=args.dataset_root,
                                             min_threshold=args.min_threshold,
                                             val_split=args.val_split,
                                             n_jobs=os.cpu_count())

    print('[init process group]')
    # distributed.init_process_group(
    #     backend=args.backend,
    #     init_method=args.init_method,
    #     world_size=args.world_size,
    #     rank=args.rank
    # )
    torch.set_num_interop_threads(
        max(args.num_threads, torch.get_num_interop_threads()))
    torch.manual_seed(args.seed)

    print('[init dataloader]')
    trainloader = DataLoader(dataset=trainset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers,
                             drop_last=False)
    # trainloader = DataLoader(
    #     dataset=trainset,
    #     batch_size=args.batch_size,
    #     sampler=DistributedSampler(trainset),
    #     num_workers=args.num_workers
    # )
    valloader = DataLoader(dataset=valset,

Beispiel #4

0

Datei anzeigen

Datei: torch.py Projekt: hfxunlp/transformer

	def __enter__(self):

		self.num_threads_env = torch.get_num_interop_threads()
		torch.set_num_interop_threads(self.num_threads_exe)

Beispiel #5

0

Datei anzeigen

# Creates a matrix
M_data = [[1., 2., 3.], [4., 5., 6]]
M = torch.tensor(M_data)
print(M)

# Random matrix 3x4x5
x = torch.randn((3, 4, 5))
print(x)

# OPERATIONS
x = torch.tensor([1., 2., 3.])
y = torch.tensor([4., 5., 6.])
z = x + y
print(z)

tStart = time.time()
for i in range(100000):
    t0 = torch.randn((100, 100))
    t1 = torch.randn((100, 100))
    t2 = torch.randn((100, 100))

    for j in range(10):
        t2 += t0 * t1

    # print(t2)

print(time.time() - tStart)
print(torch.get_num_interop_threads())
print(torch.device('cpu'), )

Beispiel #6

0

Datei anzeigen

Datei: torch.py Projekt: hfxunlp/transformer

def ensure_num_interop_threads(n):

	if torch.get_num_interop_threads() < n:
		torch.set_num_interop_threads(n)

	return torch.get_num_interop_threads()

Beispiel #7

0

Datei anzeigen

    def _setup(self, config):
        self.config = config
        print('NeuroCard config:')
        pprint.pprint(config)
        os.chdir(config['cwd'])
        for k, v in config.items():
            setattr(self, k, v)

        if config['__gpu'] == 0:
            torch.set_num_threads(config['__cpu'])

        # W&B.
        # Do wandb.init() after the os.chdir() above makes sure that the Git
        # diff file (diff.patch) is w.r.t. the directory where this file is in,
        # rather than w.r.t. Ray's package dir.
        wandb_project = config['__run']
        wandb.init(name=os.path.basename(
            self.logdir if self.logdir[-1] != '/' else self.logdir[:-1]),
                   sync_tensorboard=True,
                   config=config,
                   project=wandb_project)

        self.epoch = 0

        if isinstance(self.join_tables, int):
            # Hack to support training single-model tables.
            sorted_table_names = sorted(
                list(datasets.JoinOrderBenchmark.GetJobLightJoinKeys().keys()))
            self.join_tables = [sorted_table_names[self.join_tables]]

        # Try to make all the runs the same, except for input orderings.
        torch.manual_seed(0)
        np.random.seed(0)

        # Common attributes.
        self.loader = None
        self.join_spec = None
        join_iter_dataset = None
        table_primary_index = None

        # New datasets should be loaded here.
        assert self.dataset in ['imdb']
        if self.dataset == 'imdb':
            print('Training on Join({})'.format(self.join_tables))
            loaded_tables = []
            for t in self.join_tables:
                print('Loading', t)
                table = datasets.LoadImdb(t, use_cols=self.use_cols)
                table.data.info()
                loaded_tables.append(table)
            if len(self.join_tables) > 1:
                join_spec, join_iter_dataset, loader, table = self.MakeSamplerDatasetLoader(
                    loaded_tables)

                self.join_spec = join_spec
                self.train_data = join_iter_dataset
                self.loader = loader

                table_primary_index = [t.name for t in loaded_tables
                                       ].index('auth_user')

                table.cardinality = datasets.JoinOrderBenchmark.GetFullOuterCardinalityOrFail(
                    self.join_tables)
                self.train_data.cardinality = table.cardinality

                print('rows in full join', table.cardinality,
                      'cols in full join', len(table.columns), 'cols:', table)
            else:
                # Train on a single table.
                table = loaded_tables[0]

        if self.dataset != 'imdb' or len(self.join_tables) == 1:
            table.data.info()
            self.train_data = self.MakeTableDataset(table)

        self.table = table
        # Provide true cardinalities in a file or implement an oracle CardEst.
        self.oracle = None
        self.table_bits = 0

        # A fixed ordering?
        self.fixed_ordering = self.MakeOrdering(table)

        model = self.MakeModel(self.table,
                               self.train_data,
                               table_primary_index=table_primary_index)

        # NOTE: ReportModel()'s returned value is the true model size in
        # megabytes containing all all *trainable* parameters.  As impl
        # convenience, the saved ckpts on disk have slightly bigger footprint
        # due to saving non-trainable constants (the masks in each layer) as
        # well.  They can be deterministically reconstructed based on RNG seeds
        # and so should not be counted as model size.
        self.mb = train_utils.ReportModel(model)
        if not isinstance(model, transformer.Transformer):
            print('applying train_utils.weight_init()')
            model.apply(train_utils.weight_init)
        self.model = model

        if self.use_data_parallel:
            self.model = DataParallelPassthrough(self.model)

        wandb.watch(model, log='all')

        if self.use_transformer:
            opt = torch.optim.Adam(
                list(model.parameters()),
                2e-4,
                # betas=(0.9, 0.98),  # B in Lingvo; in Trfmr paper.
                betas=(0.9, 0.997),  # A in Lingvo.
                eps=1e-9,
            )
        else:
            if self.optimizer == 'adam':
                opt = torch.optim.Adam(list(model.parameters()), 2e-4)
            else:
                print('Using Adagrad')
                opt = torch.optim.Adagrad(list(model.parameters()), 2e-4)
        print('Optimizer:', opt)
        self.opt = opt

        total_steps = self.epochs * self.max_steps
        if self.lr_scheduler == 'CosineAnnealingLR':
            # Starts decaying to 0 immediately.
            self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                opt, total_steps)
        elif self.lr_scheduler == 'OneCycleLR':
            # Warms up to max_lr, then decays to ~0.
            self.lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
                opt, max_lr=2e-3, total_steps=total_steps)
        elif self.lr_scheduler is not None and self.lr_scheduler.startswith(
                'OneCycleLR-'):
            warmup_percentage = float(self.lr_scheduler.split('-')[-1])
            # Warms up to max_lr, then decays to ~0.
            self.lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
                opt,
                max_lr=2e-3,
                total_steps=total_steps,
                pct_start=warmup_percentage)
        elif self.lr_scheduler is not None and self.lr_scheduler.startswith(
                'wd_'):
            # Warmups and decays.
            splits = self.lr_scheduler.split('_')
            assert len(splits) == 3, splits
            lr, warmup_fraction = float(splits[1]), float(splits[2])
            self.custom_lr_lambda = train_utils.get_cosine_learning_rate_fn(
                total_steps,
                learning_rate=lr,
                min_learning_rate_mult=1e-5,
                constant_fraction=0.,
                warmup_fraction=warmup_fraction)
        else:
            assert self.lr_scheduler is None, self.lr_scheduler

        self.tbx_logger = tune_logger.TBXLogger(self.config, self.logdir)

        if self.checkpoint_to_load:
            self.LoadCheckpoint()

        self.loaded_queries = None
        self.oracle_cards = None
        if self.dataset == 'imdb' and len(self.join_tables) > 1:
            queries_job_format = utils.JobToQuery(self.queries_csv)
            self.loaded_queries, self.oracle_cards = utils.UnpackQueries(
                self.table, queries_job_format)  # 解析过程，需要替换
        timepre1 = time.time()
        print('Pretime:\n', "{:.2f}".format(timepre1 - gettimest1()))
        if config['__gpu'] == 0:
            print('CUDA not available, using # cpu cores for intra-op:',
                  torch.get_num_threads(), '; inter-op:',
                  torch.get_num_interop_threads())

Beispiel #8

0

Datei anzeigen

Datei: imagenet_tune.py Projekt: lscheinkman/nupic.research

 parser.add_argument("-e",
                     "--experiment",
                     dest="name",
                     default="default",
                     help="Experiment to run",
                     choices=CONFIGS.keys())
 parser.add_argument("-g",
                     "--num-gpus",
                     type=int,
                     default=torch.cuda.device_count(),
                     help="number of GPUs to use")
 parser.add_argument(
     "-n",
     "--num-cpus",
     type=int,
     default=torch.get_num_interop_threads(),
     help="number of CPUs to use when GPU is not available."),
 parser.add_argument("-r",
                     "--resume",
                     action="store_true",
                     help="Resume training from last known checkpoint")
 parser.add_argument("-j",
                     "--workers",
                     type=int,
                     default=6,
                     help="Number of dataloaders workers")
 parser.add_argument("-b",
                     "--backend",
                     choices=["nccl", "gloo"],
                     help="Pytorch Distributed backend",
                     default="nccl")

Beispiel #9

0

Datei anzeigen

Datei: pytorch_containers.py Projekt: ankitshah009/hummingbird

    def load(location, do_unzip_and_model_type_check=True):
        """
        Method used to load a container from the file system.

        Args:
            location: The location on the file system where to load the model.
            do_unzip_and_model_type_check: Whether to unzip the model and check the type.

        Returns:
            The loaded model.
        """
        container = None

        # Unzip the dir.
        if do_unzip_and_model_type_check:
            zip_location = location
            if not location.endswith("zip"):
                zip_location = location + ".zip"
            else:
                location = zip_location[:-4]
            assert os.path.exists(
                zip_location), "Zip file {} does not exist.".format(
                    zip_location)
            shutil.unpack_archive(zip_location, location, format="zip")

            assert os.path.exists(
                location), "Model location {} does not exist.".format(location)

        # Load the model type.
        with open(os.path.join(location, constants.SAVE_LOAD_MODEL_TYPE_PATH),
                  "r") as file:
            model_type = file.readline()

        # Check the versions of the modules used when saving the model.
        if os.path.exists(
                os.path.join(location,
                             constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH)):
            with open(
                    os.path.join(location,
                                 constants.SAVE_LOAD_MODEL_CONFIGURATION_PATH),
                    "r") as file:
                configuration = file.readlines()
            check_dumped_versions(configuration, hummingbird, torch)
        else:
            warnings.warn(
                "Cannot find the configuration file with versions. You are likely trying to load a model saved with an old version of Hummingbird."
            )

        if model_type == "torch.jit":
            # This is a torch.jit model
            model = torch.jit.load(
                os.path.join(location, constants.SAVE_LOAD_TORCH_JIT_PATH))
            with open(os.path.join(location, "container.pkl"), "rb") as file:
                container = pickle.load(file)
            container._model = model
        elif model_type == "torch":
            # This is a pytorch  model
            with open(
                    os.path.join(location, constants.SAVE_LOAD_TORCH_JIT_PATH),
                    "rb") as file:
                container = pickle.load(file)
        else:
            shutil.rmtree(location)
            raise RuntimeError(
                "Model type {} not recognized".format(model_type))

        # Need to set the number of threads to use as set in the original container.
        if container._n_threads is not None:
            if torch.get_num_interop_threads() != 1:
                torch.set_num_interop_threads(1)
            torch.set_num_threads(container._n_threads)

        shutil.rmtree(location)
        return container

Beispiel #10

0

Datei anzeigen

parser.add_argument("--config_files", type=str, nargs='+')
opt = parser.parse_args()

# Load Config Files
__C = Config()
for filename in opt.config_files:
    ic(filename)
    __C.add_from_dict(Config.parse_from_yml(filename))
    # ic(__C)
# ic(__C)

# Limit CPU usage
torch.set_num_threads(__C.CPU_THREADS)
torch.set_num_interop_threads(__C.CPU_THREADS)
print(colorama.Fore.GREEN + "Using %d/%d cores/threads of CPU" %
      (torch.get_num_threads(), torch.get_num_interop_threads()))

# config_file = "__C.YML"
# config = YAMLParser(config_file).data

# ------------------ configuration tests ----------------- #

assert __C.OPTIMIZER_TYPE in ["Adam", "SGD", "AdamW"]
assert __C.DATASET_TYPE in ["Cora", "Citeseer", "Pubmed"]
assert __C.MODEL_TYPE in ["DGCNN", "GCN", "SGC", "GCNII"]

# ---------------- general configurations ---------------- #

__C.NGPU = len(__C.GPU_IDS)
__C.PARALLEL = __C.NGPU > 1
__C.BATCH_SIZE = __C.BATCH_SIZE_SINGLE * __C.NGPU