Exemple #1
0
def train(config):
    '''
        Training function for EWM generator model.
    '''
    # Create python version of cpp operation
    # (Credit: Chen, arXiv:1906.03471, GitHub: https://github.com/chen0706/EWM)
    from torch.utils.cpp_extension import load
    my_ops = load(name = "my_ops",
                  sources = ["W1_extension/my_ops.cpp",
                             "W1_extension/my_ops_kernel.cu"],
                  verbose = False)
    import my_ops

    # Set up GPU device ordinal - if this fails, use CUDA_LAUNCH_BLOCKING environment param...
    device = torch.device(config['gpu'])

    # Get model kwargs
    emw_kwargs = setup_model.ewm_kwargs(config)

    # Setup model on GPU
    G = ewm_G(**emw_kwargs).to(device)
    G.weights_init()

    print(G)
    input('Press any key to launch')

    # Setup model optimizer
    model_params = {'g_params': G.parameters()}
    G_optim = utils.get_optim(config, model_params)

    # Set up full_dataloader (single batch)
    dataloader = utils.get_dataloader(config) # Full Dataloader
    dset_size  = len(dataloader)

    # Flatten the dataloader into a Tensor of shape [dset_size, l_dim]
    dataloader = dataloader.view(dset_size, -1).to(device)

    # Set up psi optimizer
    psi = torch.zeros(dset_size, requires_grad=True).to(device).detach().requires_grad_(True).to(device)
    psi_optim = torch.optim.Adam([psi], lr=config['psi_lr'])

    # Set up directories for saving training stats and outputs
    config = utils.directories(config)

    # Set up dict for saving checkpoints
    checkpoint_kwargs = {'G':G, 'G_optim':G_optim}

    # Variance argument for the tessellation vectors
    tess_var = config['tess_var']**0.5
    
    # Compute the stopping criterion using set of test vectors
    # and computing the 'ideal' loss between the test/target.
    print(line(60))
    print("Computing stopping criterion")
    print(line(60))
    stop_criterion = []
    test_loader = utils.get_test_loader(config)
    for _, test_vecs in enumerate(test_loader):
        # Add Gaussian noise to test_vectors
        test_vecs = test_vecs.view(config['batch_size'], -1).to(device) # 'Perfect' generator model
        t1 = tess_var*torch.randn(test_vecs.shape[0], test_vecs.shape[1]).to(device)
        test_vecs += t1
        # Add Gaussian noise to target data
        t2 = tess_var*torch.randn(dataloader.shape[0], dataloader.shape[1]).to(device)
        test_target  = dataloader + t2
        # Compute the stop score
        stop_score = my_ops.l1_t(test_vecs, test_target)
        stop_loss = -torch.mean(stop_score)
        stop_criterion.append(stop_loss.cpu().detach().numpy())
    del test_loader
    # Set stopping criterion variables
    stop_min, stop_mean, stop_max = np.min(stop_criterion), np.mean(stop_criterion), np.max(stop_criterion)
    print(line(60))
    print('Stop Criterion: min: {}, mean: {}, max: {}'.format(round(stop_min, 3), round(stop_mean, 3), round(stop_max, 3)))
    print(line(60))

    # Set up stats logging
    hist_dict = {'hist_min':[], 'hist_max':[], 'ot_loss':[]}
    losses    = {'ot_loss': [], 'fit_loss': []}
    history   = {'dset_size': dset_size, 'epoch': 0, 'iter': 0, 'losses'   : losses, 'hist_dict': hist_dict}
    config['early_end'] = (200, 320) # Empirical stopping criterion from EWM author
    stop_counter = 0
    
    # Set up progress bar for terminal output and enumeration
    epoch_bar  = tqdm([i for i in range(config['num_epochs'])])
    
    # Training Loop
    for epoch, _ in enumerate(epoch_bar):

        history['epoch'] = epoch

        # Set up memory lists: 
        #     - mu: simple feed-forward distribution 
        #     - transfer: transfer plan given by lists of indices
        # Rule-of-thumb: do not save the tensors themselves: instead, save the 
        #                data as a list and covert it to a tensor as needed.
        mu = [0] * config['mem_size']
        transfer = [0] * config['mem_size']
        mem_idx = 0

        # Compute the Optimal Transport Solver
        for ots_iter in range(1, dset_size//2):

            history['iter'] = ots_iter

            psi_optim.zero_grad()

            # Generate samples from feed-forward distribution
            z_batch = torch.randn(config['batch_size'], config['z_dim']).to(device)
            y_fake  = G(z_batch) # [B, dset_size]
            
#             # Add Gaussian noise to the output of the generator function and to the data with tessellation vectors
            t1 = tess_var*torch.randn(y_fake.shape[0], y_fake.shape[1]).to(device)
            t2 = tess_var*torch.randn(dataloader.shape[0], dataloader.shape[1]).to(device)
            
            y_fake  += t1
            dataloader += t2
            
            # Compute the W1 distance between the model output and the target distribution
            score = my_ops.l1_t(y_fake, dataloader) - psi
            phi, hit = torch.max(score, 1)

            # Remove the tesselation from the dataloader
            dataloader -= t2
            
            # Standard loss computation
            # This loss defines the sample mean of the marginal distribution
            # of the dataset. This is the only computation that generalizes.
            loss = -torch.mean(psi[hit])

            # Backprop
            loss.backward()
            psi_optim.step()

            # Update memory tensors
            mu[mem_idx] = z_batch.data.cpu().numpy().tolist()
            transfer[mem_idx] = hit.data.cpu().numpy().tolist()
            mem_idx = (mem_idx + 1) % config['mem_size']

            # Update losses
            history['losses']['ot_loss'].append(loss.item())

            if (ots_iter % 500 == 0):
                avg_loss = np.mean(history['losses']['ot_loss'])
                print('OTS Iteration {} | Epoch {} | Avg Loss Value: {}'.format(ots_iter, epoch, round(avg_loss, 3)))
#             if (iter % 2000 == 0):
#                 # Display histogram stats
#                 hist_dict, stop = utils.update_histogram(transfer, history, config)
#                 # Emperical stopping criterion
#                 if stop:
#                     break

            if ots_iter > (dset_size//3):
                if  stop_min <= np.mean(history['losses']['ot_loss']) <= stop_max:
                    stop_counter += 1
                    break

        # Compute the Optimal Fitting Transport Plan
        for fit_iter in range(config['mem_size']):
            G_optim.zero_grad()

            # Retrieve stored batch of generated samples
            z_batch = torch.tensor(mu[fit_iter]).to(device)
            y_fake  = G(z_batch) # G'(z)
            
            # Get Transfer plan from OTS: T(G_{t-1}(z))
            t_plan = torch.tensor(transfer[fit_iter]).to(device)
            y0_hit = dataloader[t_plan].to(device)
            
#            Tesselate the output of the generator function and the data
#             t1 = tess_var*torch.randn(y_fake.shape[0], y_fake.shape[1]).to(device)
#             t2 = tess_var*torch.randn(y0_hit.shape[0], y0_hit.shape[1]).to(device)
            
#             y_fake *= t1
#             y0_hit *= t1
            
            # Compute Wasserstein distance between G and T
            G_loss = torch.mean(torch.abs(y0_hit - y_fake)) * config['l_dim']

            # Backprop
            G_loss.backward() # Gradient descent
            G_optim.step()

            # Update losses
            history['losses']['fit_loss'].append(G_loss.item())

            # Check if best loss value and save checkpoint
            if 'best_loss' not in history:
                history.update({ 'best_loss' : G_loss.item() })

            best = G_loss.item() < (history['best_loss'] * 0.70)
            if best:
                history['best_loss'] = G_loss.item()
                checkpoint = utils.get_checkpoint(history['epoch'], checkpoint_kwargs, config)
                utils.save_checkpoint(checkpoint, config)

            if (fit_iter % 500 == 0):
                avg_loss = np.mean(history['losses']['fit_loss'])
                print('FIT Iteration {} | Epoch {} | Avg Loss Value: {}'.format(fit_iter, epoch, round(avg_loss,3)))

    # Save a checkpoint at end of training
    checkpoint = utils.get_checkpoint(history['epoch'], checkpoint_kwargs, config)
    utils.save_checkpoint(checkpoint, config)

    # Save training data to csv's after training end
    utils.save_train_hist(history, config, times=None, histogram=history['hist_dict'])
    print("Stop Counter Triggered {} Times".format(stop_counter))

    # For Aiur
    print("I see you have an appetite for destruction.")
    print("And you have learned to use your illusion.")
    print("But I find your lack of control disturbing.")
Exemple #2
0
if torch.__version__.endswith('+cpu'):
    torch_version = version.parse(torch.__version__.rstrip('+cpu'))
else:
    torch_version = version.parse(torch.__version__)

try:
    __version__ = get_distribution(__name__).version
except DistributionNotFound:
    # package is not installed
    pass


if config.JIT_ENABLED:
    extensions_dir = os.path.join(pkg_dir, 'csrc')
    sources = glob.glob(os.path.join(extensions_dir, '*.cpp'))
    sources = [os.path.join(extensions_dir, s) for s in sources]

    try:
        cpp_extension.load(
            name='autograd_ste_ops',
            sources=sources,
            is_python_module=False,
            verbose=config.VERBOSE)
        NATIVE_STE_BACKEND_LOADED = True
    except:
        warnings.warn("Brevitas' native STE backend is enabled but couldn't be loaded")
        NATIVE_STE_BACKEND_LOADED = False
else:
    NATIVE_STE_BACKEND_LOADED = False
Exemple #3
0
# -*- coding: utf-8 -*-
"""
Created on Mon Oct  1 07:59:09 2018

@author: nsde
"""
#%%
import torch
from torch.utils.cpp_extension import load

#%%
if __name__ == '__main__':
    #   _dir = get_dir(__file__)

    # Compile cpu source
    cpab_cpu = load(name='cpab_cpu', sources=['CPAB_ops.cpp'], verbose=True)

    # Compule gpu source
    cpab_gpu = load(name='cpab_gpu',
                    sources=['CPAB_ops_cuda.cpp', 'CPAB_ops_cuda_kernel.cu'],
                    verbose=True)
import torch

from torch.utils.cpp_extension import load
cd = load(name="cd",
          sources=[
              "./chamfer_distance/chamfer_distance.cpp",
              "./chamfer_distance/chamfer_distance.cu"
          ])


class ChamferDistanceFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, xyz1, xyz2):
        batchsize, n, _ = xyz1.size()
        _, m, _ = xyz2.size()
        xyz1 = xyz1.contiguous()
        xyz2 = xyz2.contiguous()
        dist1 = torch.zeros(batchsize, n)
        dist2 = torch.zeros(batchsize, m)

        idx1 = torch.zeros(batchsize, n, dtype=torch.int)
        idx2 = torch.zeros(batchsize, m, dtype=torch.int)

        if not xyz1.is_cuda:
            cd.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
        else:
            dist1 = dist1.cuda()
            dist2 = dist2.cuda()
            idx1 = idx1.cuda()
            idx2 = idx2.cuda()
            cd.forward_cuda(xyz1, xyz2, dist1, dist2, idx1, idx2)
Exemple #5
0
def load_cpp(name, files, path):
    os.makedirs(Config().model / 'qrnn', exist_ok=True)
    return cpp_extension.load(name=name,
                              sources=[path / f for f in files],
                              build_directory=Config().model / 'qrnn')
Exemple #6
0
#  Ref: https://github.com/chrdiller/pyTorchChamferDistance
import os, torch, torch.nn as nn
from torch.utils.cpp_extension import load

basedir = os.path.dirname(__file__)
cd = load(name="cd",
          sources=[
              os.path.join(basedir, "chamfer_distance.cpp"),
              os.path.join(basedir, "chamfer_distance.cu")
          ])


class ChamferDistanceFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, xyz1, xyz2):
        batchsize, n, _ = xyz1.size()
        _, m, _ = xyz2.size()
        xyz1 = xyz1.contiguous()
        xyz2 = xyz2.contiguous()
        dist1 = torch.zeros(batchsize, n)
        dist2 = torch.zeros(batchsize, m)

        idx1 = torch.zeros(batchsize, n, dtype=torch.int)
        idx2 = torch.zeros(batchsize, m, dtype=torch.int)

        if not xyz1.is_cuda:
            cd.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
        else:
            dist1 = dist1.cuda()
            dist2 = dist2.cuda()
            idx1 = idx1.cuda()
import os

import torch
from torch import nn
from torch.nn import functional as F
from torch.autograd import Function
from torch.utils.cpp_extension import load

module_path = os.path.dirname(__file__)
fused = load(
    "fused",
    sources=[
        os.path.join(module_path, "fused_bias_act.cpp"),
        os.path.join(module_path, "fused_bias_act_kernel.cu"),
    ],
)


class FusedLeakyReLUFunctionBackward(Function):
    @staticmethod
    def forward(ctx, grad_output, out, bias, negative_slope, scale):
        ctx.save_for_backward(out)
        ctx.negative_slope = negative_slope
        ctx.scale = scale

        empty = grad_output.new_empty(0)

        grad_input = fused.fused_bias_act(grad_output, empty, out, 3, 1,
                                          negative_slope, scale)

        dim = [0]
import os

chamfer_found = importlib.find_loader("chamfer_6D") is not None
if not chamfer_found:
    ## Cool trick from https://github.com/chrdiller
    print("Jitting Chamfer 6D")
    cur_path = os.path.dirname(os.path.abspath(__file__))
    build_path = cur_path.replace('chamfer6D', 'tmp')
    os.makedirs(build_path, exist_ok=True)

    from torch.utils.cpp_extension import load
    chamfer_6D = load(name="chamfer_6D",
                      sources=[
                          "/".join(
                              os.path.abspath(__file__).split('/')[:-1] +
                              ["chamfer_cuda.cpp"]),
                          "/".join(
                              os.path.abspath(__file__).split('/')[:-1] +
                              ["chamfer6D.cu"]),
                      ],
                      build_directory=build_path)
    print("Loaded JIT 6D CUDA chamfer distance")

else:
    import chamfer_6D
    print("Loaded compiled 6D CUDA chamfer distance")


# Chamfer's distance module @thibaultgroueix
# GPU tensors only
class chamfer_6DFunction(Function):
    @staticmethod
Exemple #9
0
# CXX=g++-4.9 CC=gcc-4.9 python jit.py
from torch.utils.cpp_extension import load
import os

cur_dir = os.path.abspath(os.path.dirname(__file__))
gpu_flow = load(
    "gpu_flow",
    ["gpu_flow.cpp", "gpu_flow_kernel.cu"],
    build_directory=cur_dir,
    verbose=True,
    extra_cuda_cflags=[
        "-arch=sm_52", "--ptxas-options=-v", "-c", "--compiler-options",
        "'-fPIC'"
    ],  # sm_35, sm_61
)
help(gpu_flow)
Exemple #10
0
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function

# import neural_renderer.cuda.rasterize as rasterize_cuda
from torch.utils.cpp_extension import load
rasterize_cuda = load(name='rasterize_cuda',
                      sources=[os.path.join(os.path.dirname(__file__), 'cuda/rasterize_cuda.cpp'),
                               os.path.join(os.path.dirname(__file__), 'cuda/rasterize_cuda_kernel.cu')])

DEFAULT_IMAGE_SIZE = 256
DEFAULT_ANTI_ALIASING = True
DEFAULT_NEAR = 0.1
DEFAULT_FAR = 100
DEFAULT_EPS = 1e-4
DEFAULT_BACKGROUND_COLOR = (0, 0, 0)

class RasterizeFunction(Function):
    '''
    Definition of differentiable rasterize operation
    Some parts of the code are implemented in CUDA
    Currently implemented only for cuda Tensors
    '''
    @staticmethod
    def forward(ctx, faces, textures, image_size, near, far, eps, background_color,
                return_rgb=False, return_alpha=False, return_depth=False):
        '''
        Forward pass
Exemple #11
0
import torch
from torch.autograd import Function
from torch.nn.modules.module import Module
from torch.autograd import Variable 
import os
from torch.autograd.function import once_differentiable

torch_ver = torch.__version__[:3]

if torch_ver=="0.4":
    from torch.utils.cpp_extension import load   
    build_path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),'../cppcuda/build/'))

    print('compiling/loading roi_align')
    roialign = load(name='roialign',sources=['lib/cppcuda/roi_align_binding.cpp',
                                            'lib/cppcuda/roi_align_forward_cuda.cu',
                                            'lib/cppcuda/roi_align_backward_cuda.cu'],
                    build_directory=build_path,verbose=True)
else:
    import cppcuda_cffi.roialign as roialign


class RoIAlignFunction(Function):
    # def __init__(ctx, pooled_height, pooled_width, spatial_scale, sampling_ratio):
    #     ctx.pooled_width = int(pooled_width)
    #     ctx.pooled_height = int(pooled_height)
    #     ctx.spatial_scale = float(spatial_scale)
    #     ctx.sampling_ratio = int(sampling_ratio)
    #     ctx.features_size = None
    #     ctx.rois=None

    @staticmethod  
Exemple #12
0
from torch.utils.cpp_extension import load

calc_assoc_cuda = load('calc_assoc_cuda',
                       ['calc_assoc_cuda.cpp', 'calc_assoc_cuda_kernel.cu'],
                       verbose=True)
help(calc_assoc_cuda)
Exemple #13
0
import torch.autograd as autograd
import torch.cuda.comm as comm
import torch.nn.functional as F
from torch.autograd.function import once_differentiable
from torch.utils.cpp_extension import load
import os, time
import functools
from torch.autograd import Variable
curr_dir = os.path.dirname(os.path.abspath(__file__))
_src_path = os.path.join(curr_dir, "src")
_build_path = os.path.join(curr_dir, "build")
os.makedirs(_build_path, exist_ok=True)
rcca = load(name="rcca",
            extra_cflags=["-O3"],
            build_directory=_build_path,
            verbose=True,
            sources = [os.path.join(_src_path, f) for f in [
                "lib_cffi.cpp", "ca.cu"
                ]],
            extra_cuda_cflags=["--expt-extended-lambda"])

def _check_contiguous(*args):
    if not all([mod is None or mod.is_contiguous() for mod in args]):
        raise ValueError("Non-contiguous input")


class CA_Weight(autograd.Function):
    @staticmethod
    def forward(ctx, t, f):
        # Save context
        n, c, h, w = t.size()
        size = (n, h+w-1, h, w)
#Copyright (c) Facebook, Inc. and its affiliates.
#All rights reserved.

#This source code is licensed under the license found in the
#LICENSE file in the root directory of this source tree.
import torch
from torch.utils.cpp_extension import load
cd = load(name="cd",
          sources=[
              "../third_party_code/chamfer_distance.cpp",
              "../third_party_code/chamfer_distance.cu"
          ])


class ChamferDistanceFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, xyz1, xyz2):
        batchsize, n, _ = xyz1.size()
        _, m, _ = xyz2.size()
        xyz1 = xyz1.contiguous()
        xyz2 = xyz2.contiguous()
        dist1 = torch.zeros(batchsize, n)
        dist2 = torch.zeros(batchsize, m)

        idx1 = torch.zeros(batchsize, n, dtype=torch.int)
        idx2 = torch.zeros(batchsize, m, dtype=torch.int)

        dist1 = dist1.cuda()
        dist2 = dist2.cuda()
        idx1 = idx1.cuda()
        idx2 = idx2.cuda()
Exemple #15
0
from torch.nn import functional as F
from torch.autograd import Function
from torch.utils.cpp_extension import load

module_path = os.path.dirname(__file__)
gpu_name = "".join(torch.cuda.get_device_name().split(" "))
build_dir = os.path.join(
    module_path,
    ".build_cache_{}_PT{}_cu{}_gpu{}".format(socket.gethostname(),
                                             torch.__version__,
                                             torch.version.cuda, gpu_name))
if not os.path.exists(build_dir): os.makedirs(build_dir)
upfirdn2d_op = load(
    "upfirdn2d",
    sources=[
        os.path.join(module_path, "upfirdn2d.cpp"),
        os.path.join(module_path, "upfirdn2d_kernel.cu"),
    ],
    build_directory=build_dir,
)


class UpFirDn2dBackward(Function):
    @staticmethod
    def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad,
                in_size, out_size):

        up_x, up_y = up
        down_x, down_y = down
        g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad

        grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)
Exemple #16
0
from os import path
import torch
import torch.autograd as autograd
import torch.cuda.comm as comm
from torch.autograd.function import once_differentiable
from torch.utils.cpp_extension import load
from torch import nn
from torch.nn import functional as F

_src_path = path.dirname(path.abspath(__file__))
_ext = load(
    name="incenter_match_build",
    extra_cflags=["-O3"],
    sources=[path.join(_src_path, f) for f in [
        "vanilla.cpp",
        "vanilla.cu",
    ]],
    extra_cuda_cflags=[
        "--expt-extended-lambda  -D_MWAITXINTRIN_H_INCLUDED -D__STRICT_ANSI__"
    ],
)


def _check_contiguous(*args):
    if not all([mod is None or mod.is_contiguous() for mod in args]):
        raise ValueError("Non-contiguous input")


# weight : N,S,H,W
class Vanilla_Weight(autograd.Function):
    @staticmethod
Exemple #17
0
from os import path

import torch.autograd as autograd
import torch.cuda.comm as comm
from torch.autograd.function import once_differentiable
from torch.utils.cpp_extension import load

_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
_backend = load(name="inplace_abn",
                extra_cflags=["-O3"],
                sources=[path.join(_src_path, f) for f in [
                    "inplace_abn.cpp",
                    "inplace_abn_cpu.cpp",
                    "inplace_abn_cuda.cu"
                ]],
                extra_cuda_cflags=["--expt-extended-lambda"])

# Activation names
ACT_LEAKY_RELU = "leaky_relu"
ACT_ELU = "elu"
ACT_NONE = "none"


def _check(fn, *args, **kwargs):
    success = fn(*args, **kwargs)
    if not success:
        raise RuntimeError("CUDA Error encountered in {}".format(fn))


def _broadcast_shape(x):
    out_size = []
Exemple #18
0
from torch.utils.cpp_extension import load
lltm_cpp = load(name="lltm_cpp", sources=["lltm.cpp"], verbose=True)
help(lltm_cpp)
Exemple #19
0
import os

import torch
from torch.nn import functional as F
from torch.autograd import Function
from torch.utils.cpp_extension import load

module_path = os.path.dirname(__file__)
upfirdn2d_op = load(
    "upfirdn2d",
    sources=[
        os.path.join(module_path, "upfirdn2d.cpp"),
        os.path.join(module_path, "upfirdn2d_kernel.cu"),
    ],
)


class UpFirDn2dBackward(Function):
    @staticmethod
    def forward(ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad,
                in_size, out_size):

        up_x, up_y = up
        down_x, down_y = down
        g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad

        grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)

        grad_input = upfirdn2d_op.upfirdn2d(
            grad_output,
            grad_kernel,
    # tf.backends.cudnn.enabled = False
    #tf.backends.cudnn.benchmark = True
    MULTIPLIER = tf.cuda.device_count()
else:
    MULTIPLIER = 1
#if (hvd.rank() == 0):
if (True):
    f_out = open("./PREDICT.OUT", "w")
    f_out.close()

FREEZE_MODEL = tf.load("./freeze_model.pytorch", map_location=device)

"""Load coordinates, sym_coordinates, energy, force, type, n_atoms and parameters"""
script_path = sys.path[0]
if (device != tf.device('cpu')):
    comput_descrpt_and_deriv = load(name="test_from_cpp", sources=[script_path + "/comput_descrpt_deriv.cu"], verbose=True)
else:
    comput_descrpt_and_deriv = load(name="test_from_cpp", sources=[script_path + "/comput_descrpt_deriv.cpp", script_path + "/../../c/Utilities.cpp"], verbose=True, extra_cflags=["-fopenmp", "-O2"])
parameters_from_bin = FREEZE_MODEL['parameters']
parameters_from_file = Parameters()
read_parameters_flag = read_parameters(parameters_from_file)
parameters_from_file_adapt_bin = Parameters()
read_parameters_flag = read_parameters(parameters_from_file_adapt_bin)
parameters = parameters_from_file
print("All parameters:")
print(parameters)

COORD_Reshape_tf, SYM_COORD_Reshape_tf, ENERGY_tf, FORCE_Reshape_tf, N_ATOMS_tf, TYPE_Reshape_tf, NEI_IDX_Reshape_tf, \
NEI_COORD_Reshape_tf, FRAME_IDX_tf, SYM_COORD_DX_Reshape_tf, SYM_COORD_DY_Reshape_tf, SYM_COORD_DZ_Reshape_tf, \
N_ATOMS_ORI_tf, NEI_TYPE_Reshape_tf= read_and_init_bin_file(parameters_from_file, default_dtype=default_dtype, is_predict=1)
import os

import torch
from torch import nn
from torch.autograd import Function
from torch.utils.cpp_extension import load

module_path = os.path.dirname(__file__)
fused = load(
    'fused',
    sources=[
        os.path.join(module_path, 'fused_bias_act.cpp'),
        os.path.join(module_path, 'fused_bias_act_kernel.cu'),
    ],
)


class FusedLeakyReLUFunctionBackward(Function):
    @staticmethod
    def forward(ctx, grad_output, out, negative_slope, scale):
        ctx.save_for_backward(out)
        ctx.negative_slope = negative_slope
        ctx.scale = scale

        empty = grad_output.new_empty(0)

        grad_input = fused.fused_bias_act(grad_output, empty, out, 3, 1,
                                          negative_slope, scale)

        dim = [0]
Exemple #22
0
    import warnings
    import os
    from torch.utils.cpp_extension import load
    warnings.warn("Unable to load pointops_cuda cpp extension.")
    pointops_cuda_src = os.path.join(os.path.dirname(__file__), "../src")
    pointops_cuda = load('pointops_cuda', [
        pointops_cuda_src + '/pointops_api.cpp',
        pointops_cuda_src + '/ballquery/ballquery_cuda.cpp',
        pointops_cuda_src + '/ballquery/ballquery_cuda_kernel.cu',
        pointops_cuda_src + '/knnquery/knnquery_cuda.cpp',
        pointops_cuda_src + '/knnquery/knnquery_cuda_kernel.cu',
        pointops_cuda_src + '/knnquery_heap/knnquery_heap_cuda.cpp',
        pointops_cuda_src + '/knnquery_heap/knnquery_heap_cuda_kernel.cu',
        pointops_cuda_src + '/grouping/grouping_cuda.cpp',
        pointops_cuda_src + '/grouping/grouping_cuda_kernel.cu',
        pointops_cuda_src + '/grouping_int/grouping_int_cuda.cpp',
        pointops_cuda_src + '/grouping_int/grouping_int_cuda_kernel.cu',
        pointops_cuda_src + '/interpolation/interpolation_cuda.cpp',
        pointops_cuda_src + '/interpolation/interpolation_cuda_kernel.cu',
        pointops_cuda_src + '/sampling/sampling_cuda.cpp',
        pointops_cuda_src + '/sampling/sampling_cuda_kernel.cu',
        pointops_cuda_src + '/labelstat/labelstat_cuda.cpp',
        pointops_cuda_src + '/labelstat/labelstat_cuda_kernel.cu',
        pointops_cuda_src + '/featuredistribute/featuredistribute_cuda.cpp',
        pointops_cuda_src + '/featuredistribute/featuredistribute_cuda_kernel.cu'
    ], build_directory=pointops_cuda_src, verbose=False)


class FurthestSampling(Function):
    @staticmethod
    def forward(ctx, xyz, m):
Exemple #23
0
def train_MNIST(config):
    # Create python version of cpp operation
    if config['dist'] == 'W1':
        print("Building C++ extension for W1 (requires PyTorch >= 1.0.0)...")
        from torch.utils.cpp_extension import load
        my_ops = load(name="my_ops",
                      sources=[
                          "W1_extension/my_ops.cpp",
                          "W1_extension/my_ops_kernel.cu"
                      ],
                      verbose=False)
        import my_ops
        print("Building complete")

    # Centralize stats logging
    times, losses, hist_dict, checkpt = utils.centralized_logs()

    # Select device
    device = torch.device(config['gpu'])

    # Update config dict with MNIST params and get MNIST dataset as one batch
    # config, mnist_data = utils.MNIST(config)
    '''
        Returns MNIST training data as a single batch of data
    '''
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.5, ), (0.5, ))])
    dataset = dset.MNIST(root=config['data_root'],
                         train=True,
                         download=True,
                         transform=transform)

    def get_data(dataset):
        full_dataloader = torch.utils.data.DataLoader(dataset,
                                                      batch_size=len(dataset))
        for y_batch, l_batch in full_dataloader:
            return y_batch

    y_t = get_data(dataset)
    n_dim = len(y_t)
    mnist_data = y_t.view(n_dim, -1).to(device)

    config.update({
        'dset_size': n_dim,
        'imsize': 28,
        'out_dim': 784,
        'batch_size': 64,
        'sample_size': 16,
        'early_end': (200, 320)
    })

    # Set MLP architecture
    G_arch = utils.get_mlp_arch(config)

    # Create G_model
    G_mlp = utils.get_model(G_arch, device)

    # Get optimizer
    opt_G = utils.get_optim(G_arch, G_mlp.parameters(), MNIST=True)
    print(G_mlp)

    # Initialize G_model weights and get the number of layers
    # G_mlp = utils.weights_init(G_mlp, MNIST=True)
    def initialize_weights(net):
        for m in net.modules():
            if isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.02)
            if hasattr(m, "bias") and m.bias is not None:
                m.bias.data.zero_()

    initialize_weights(G_mlp)

    # Create labels for experiment
    labels = utils.create_labels(config)

    # Update config with labels and save locations
    config = utils.update_with_labels(config, labels)

    # Setup psi optimizer
    psi = torch.zeros(n_dim, requires_grad=True, device=device)
    opt_psi = torch.optim.Adam([psi], lr=1e-1)

    # Set fixed noise vector for testing
    z_fixed = utils.get_z(config, device, sample=False)
    z_fixed.resize_((config['batch_size'], config['z_dim'])).to(device)

    # Training loop
    for epoch in range(config['num_epochs']):

        epoch_start_time = time.time()

        # Save list of losses for end-training determination
        loss_memory = []

        # Set up memory tensors: simple feed-forward distribution, transfer plan
        mu = torch.zeros(config['mem_size'], config['batch_size'],
                         config['z_dim'])
        transfer = torch.zeros(config['mem_size'],
                               config['batch_size'],
                               dtype=torch.long)
        mem_idx = 0

        # Compute Optimal Transport Solver (OTS) over every training example
        ot_start = time.time()

        # for ots_iter in range(0, config['dset_size']):
        for iter in range(1, 20001):

            opt_psi.zero_grad()

            # Generate samples from feed-forward distribution
            z_batch = utils.get_z(config, device, sample=False)
            z_batch.resize_((config['batch_size'], config['z_dim'])).to(device)
            y_fake = G_mlp(z_batch)  # [B, n_dim]

            # Compute cost between sample batch and target distribution
            if (config['dist'] == 'W1'):
                score = -my_ops.l1_t(y_fake, mnist_data) - psi
            else:
                score = torch.matmul(
                    y_fake, mnist_data.t()) - psi  # score: [B, N], psi: [N]

            phi, hit = torch.max(score, 1)
            # phi, hit = torch.min(score, 1) # [B], [B]

            # Wasserstein distance computation: d(x,y)^p
            if (config['dist'] == 'W1'):
                loss_primal = torch.mean(
                    torch.abs(y_fake - mnist_data[hit])) * config['out_dim']
            else:
                loss_primal = torch.mean(
                    (y_fake - mnist_data[hit])**2) * config['out_dim']

            # Loss computation
            # loss = (torch.mean(phi) + torch.mean(psi)) # Testing this
            loss = -torch.mean(psi[hit])  # equiv. to loss?

            # Backprop
            loss.backward()  # Gradient ascent
            opt_psi.step()

            # Append losses to dict
            losses['ot_loss'].append(loss.item())
            losses['w2_estim'].append(loss_primal.item())

            # Update memory tensors
            mu[mem_idx] = z_batch
            transfer[mem_idx] = hit
            mem_idx = (mem_idx + 1) % config['mem_size']

            if (iter % 500 == 0):
                print('OTS Iteration {} | Epoch {}'.format(iter, epoch))
            if (iter % 2000 == 0):
                # Display histogram stats
                hist_dict, stop = utils.update_histogram(
                    transfer, n_dim, epoch, iter, config, losses, hist_dict)
                # Emperical stopping criterion
                if stop:
                    break

        # Compute OTS time and append
        ot_end = time.time()
        times['ot_time'].append(ot_end - ot_start)

        # Compute Fitting Optimal Transport Plan (FIT)
        fit_start = time.time()
        for fit_iter in range(config['mem_size']):

            opt_G.zero_grad()

            # Get stored batch of generated samples
            z_batch = mu[fit_iter].to(device)
            y_fake = G_mlp(z_batch)  # G'(z)

            # Get Transfer plan from OTS: T(G_{t-1}(z))
            y0_hit = mnist_data[transfer[fit_iter].to(device)]

            # Compute Wasserstein distance between G and T
            if (config['dist'] == 'W1'):
                loss_g = torch.mean(
                    torch.abs(y0_hit - y_fake)) * config['out_dim']
            else:
                loss_g = torch.mean((y0_hit - y_fake)**2) * config['out_dim']

            # Backprop
            loss_g.backward()  # Gradient descent
            opt_G.step()

            # Append losses to dict
            losses['g_loss'].append(loss_g.item())
            loss_memory.append(loss_g.item())

            if (fit_iter % 500 == 0):
                print(
                    'Fit_iter: {} | Epoch: {} | Loss: {:.2f} | Best Loss: {:.2f}'
                    .format(fit_iter, epoch, loss_g, checkpt['best']))

            # Check if best loss value and save checkpoint
            # threshold = (checkpt['best'] - round(checkpt['best']*0.5))
            # best = ( loss_g.item() < threshold )
            # if best:
            #     checkpt['best'] = loss_g.item()
            #     chkpt_dict = utils.checkpoint_dict(fit_iter, epoch, G_mlp, opt_G)
            #     utils.save_checkpoint(chkpt_dict, best, epoch, -1, config['weights_root'])

            # Save periodic checkpoint
            # if (fit_iter % 2000 == 0):
            #     chkpt_dict = utils.checkpoint_dict(fit_iter, epoch, G_mlp, opt_G)
            #     utils.save_checkpoint(chkpt_dict, False, epoch, fit_iter, config['weights_root'])

            # Get random sample from G
            if (fit_iter % 1000 == 0):
                z_rand = utils.get_z(config, device, sample=True)
                z_rand.resize_(
                    (config['sample_size'], config['z_dim'])).to(device)
                sample = G_mlp(z_rand).view(-1, 1, config['imsize'],
                                            config['imsize'])
                utils.save_sample(sample, epoch, fit_iter, config['random'])

            # Check if loss is changing - stop training if no change
            if (len(loss_memory) > (config['mem_size'] // 2)):
                if ((loss_g <= (mean(loss_memory)*.999)) and \
                    (loss_g >= (mean(loss_memory)*.995))):
                    break

        # Compute FIT time
        fit_end = time.time()
        times['fit_time'].append(fit_end - fit_start)

        # Compute epoch time
        times['epoch_times'].append(time.time() - epoch_start_time)

        # Output to terminal
        print('Best loss:  {}'.format(checkpt['best']))
        print('Epoch_time: {}'.format(time.time() - epoch_start_time))
        print('Num epochs: {}'.format(epoch))
        print("FIT loss: {:.2f}".format(np.mean(losses['g_loss'])))

        # Save fixed sample at end of training epoch
        sample = G_mlp(z_fixed).view(-1, 1, config['imsize'], config['imsize'])
        utils.save_sample(sample, epoch, 0, config['fixed'])

    # Save training data to csv after training completion
    utils.save_stats(times, losses, hist_dict, G_arch, config['save_root'])
Exemple #24
0
import math

import torch
import torch.nn as nn
from torch.utils.cpp_extension import load

tr_cuda = load('tr_cuda', ['kernels/tr_cuda.cpp', 'kernels/tr_cuda_kernel.cu'])


def hese(number):
    '''
    Applies HESE encoding on a number.
    Returns the power-of-two exponents in the encoding.
    '''
    char_number = bin(number).split('b')[1]
    if bin(number)[0] == '-':
        sign = -1
    else:
        sign = 1
    char_number = '0' + char_number + '0'
    char_number = char_number[::-1]
    exponents = []
    for i in range(len(char_number) - 1):
        b1 = char_number[i]
        b2 = char_number[i + 1]
        if b1 == b2:
            continue
        if b1 == '0':
            exponents.append(-sign * 2**i)
        else:
            exponents.append(sign * 2**i)
Exemple #25
0
import torch.autograd
from torch.autograd import Function
from torch.utils.cpp_extension import load
import os

base_path = os.getcwd()

line_variance_parallel = load(
    name="line_variance_parallel",
    sources=[
        os.path.join(
            base_path,
            "layers/DefGrid/variance_function_atom/line_distance_func_parallel/variance_line_distance_for.cu"
        ),
        os.path.join(
            base_path,
            "layers/DefGrid/variance_function_atom/line_distance_func_parallel/variance_line_distance_back.cu"
        ),
        os.path.join(
            base_path,
            "layers/DefGrid/variance_function_atom/line_distance_func_parallel/variance_line_distance.cpp"
        )
    ],
    verbose=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
############################################
eps = 1e-8
debug = False


############################################3
Exemple #26
0
Just be sure to import torch first, as this will resolve some symbols 
that the dynamic linker must see;
"""
import math
import torch
import time
""" method 1: Building with setuptools 
# Our module!
from build.lib import lltm_cpp
"""
""" JIT Compiling Extensions: just in time, JIT """
from torch.utils.cpp_extension import load

lltm_cpp = load(
    name="lltm_cpp",
    sources=["lltm.cpp"],
    #verbose = False
    verbose=True)


class LLTMFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input, weights, bias, old_h, old_cell):
        outputs = lltm_cpp.forward(input, weights, bias, old_h, old_cell)
        new_h, new_cell = outputs[:2]
        variables = outputs[1:] + [weights]
        ctx.save_for_backward(*variables)

        return new_h, new_cell

    @staticmethod
import torch
from torch.autograd import Function
from torch.nn import Module
from torch.utils.cpp_extension import load

lib = load(
    name="depthflowprojection_cuda",
    sources=[
        "DAIN/helper/DepthFlowProjection/depthflowprojection_cuda.cc",
        "DAIN/helper/DepthFlowProjection/depthflowprojection_cuda_kernel.cu"
    ],
    verbose=True,
)


class DepthFlowProjectionLayer(Function):
    def __init__(self, requires_grad):
        super(DepthFlowProjectionLayer, self).__init__()

    @staticmethod
    def forward(ctx, input1, input2, requires_grad):
        assert input1.is_contiguous()
        assert input2.is_contiguous()
        fillhole = 1 if requires_grad == False else 0

        if input1.is_cuda:
            count = (torch.cuda.FloatTensor().resize_(input1.size(0), 1,
                                                      input1.size(2),
                                                      input1.size(3)).zero_())
            output = torch.cuda.FloatTensor().resize_(input1.size()).zero_()
            err = lib.DepthFlowProjectionLayer_gpu_forward(
Exemple #28
0
from os import path
import torch
import torch.distributed as dist
import torch.autograd as autograd
import torch.cuda.comm as comm
from torch.autograd.function import once_differentiable
from torch.utils.cpp_extension import load

_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
_backend = load(name="inplace_abn",
                extra_cflags=["-O3"],
                sources=[
                    path.join(_src_path, f) for f in [
                        "inplace_abn.cpp", "inplace_abn_cpu.cpp",
                        "inplace_abn_cuda.cu", "inplace_abn_cuda_half.cu"
                    ]
                ],
                extra_cuda_cflags=["--expt-extended-lambda"])

# Activation names
ACT_RELU = "relu"
ACT_LEAKY_RELU = "leaky_relu"
ACT_ELU = "elu"
ACT_NONE = "none"


def _check(fn, *args, **kwargs):
    success = fn(*args, **kwargs)
    if not success:
        raise RuntimeError("CUDA Error encountered in {}".format(fn))
Exemple #29
0
from pathlib import Path
import os, sys
_srcdir = Path(__file__).resolve().parent
_build_dir = Path.home() / "tmp"

from torch.utils.cpp_extension import load, verify_ninja_availability
try:
    verify_ninja_availability()
except:
    os.environ['PATH'] = str(Path(
        sys.executable).parent) + ":" + os.environ['PATH']

print("Compiling npp extension")
if (_build_dir / "lock").exists():
    print("Warning: found %s, compilation may hang here" %
          (_build_dir / "lock"))
nppig_cpp = load(verbose=False,
                 name="nppig_cpp",
                 sources=[_srcdir / "nppig.cpp"],
                 extra_ldflags=['-lnppc', '-lnppig'],
                 with_cuda=True,
                 build_directory=_build_dir)
print("done")
Exemple #30
0
from os import path

import torch.autograd as autograd
import torch.cuda.comm as comm
from torch.autograd.function import once_differentiable
from torch.utils.cpp_extension import load

_src_path = path.join(path.dirname(path.abspath(__file__)), "src")
_backend = load(
    name="inplace_abn",
    #                extra_cflags=["-O3"],
    extra_cflags=["-O3 -D_GLIBCXX_USE_CXX11_ABI=0"],
    sources=[
        path.join(_src_path, f) for f in
        ["inplace_abn.cpp", "inplace_abn_cpu.cpp", "inplace_abn_cuda.cu"]
    ],
    extra_cuda_cflags=["--expt-extended-lambda"])

# Activation names
ACT_RELU = "relu"
ACT_LEAKY_RELU = "leaky_relu"
ACT_ELU = "elu"
ACT_NONE = "none"


def _check(fn, *args, **kwargs):
    success = fn(*args, **kwargs)
    if not success:
        raise RuntimeError("CUDA Error encountered in {}".format(fn))

Exemple #31
0
from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
from torch.utils.cpp_extension import load

from lib.extensions.syncbn.comm import SyncMaster

torch_ver = torch.__version__[:3]

print('compiling/loading syncbn')
build_path = '/tmp/bulid/syncbn'
if not os.path.exists(build_path):
    os.makedirs(build_path)

syncbn = load(name='syncbn',
              sources=[
                  'lib/extensions/syncbn/src/operator.cpp',
                  'lib/extensions/syncbn/src/syncbn_kernel.cu'
              ],
              build_directory=build_path,
              verbose=True)


def sum_square(input):
    r"""Calculate sum of elements and sum of squares for Batch Normalization"""
    return _sum_square.apply(input)


class _sum_square(Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        if input.is_cuda:
Exemple #32
0
import torch

from torch.utils.cpp_extension import load

cd = load(
    name="cd",
    sources=[
        "/home/user/point-normals-upsampling/pyTorchChamferDistance/chamfer_distance/chamfer_distance.cpp",
        "/home/user/point-normals-upsampling/pyTorchChamferDistance/chamfer_distance/chamfer_distance.cu"
    ])


class ChamferDistanceFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, xyz1, xyz2):
        batchsize, n, _ = xyz1.size()
        _, m, _ = xyz2.size()
        xyz1 = xyz1.contiguous()
        xyz2 = xyz2.contiguous()
        dist1 = torch.zeros(batchsize, n)
        dist2 = torch.zeros(batchsize, m)

        idx1 = torch.zeros(batchsize, n, dtype=torch.int)
        idx2 = torch.zeros(batchsize, m, dtype=torch.int)

        if not xyz1.is_cuda:
            cd.forward(xyz1, xyz2, dist1, dist2, idx1, idx2)
        else:
            dist1 = dist1.cuda()
            dist2 = dist2.cuda()
            idx1 = idx1.cuda()
Exemple #33
0
# Uses code from https://github.com/cooooorn/Pytorch-XNOR-Net

from modules.base import *
from torch.nn.parameter import Parameter
from torch.utils.cpp_extension import load
boolop_cuda = load(name="boolop_cuda",
                   sources=[
                       "extensions/booleanOperations.cpp",
                       "extensions/booleanOperationsCuda.cu"
                   ])
from extensions import booleanOperations
import cupy


class AsType(nn.Module):
    def __init__(self, dtype):
        super(AsType, self).__init__()
        self.dtype = dtype

    def forward(self, x):
        return x.type(self.dtype)


class ExtractBits(nn.Module):
    def __init__(self, dtype):
        nn.Module.__init__(self)
        self.dtype = dtype
        self.bitlength = torch.iinfo(dtype).bits
        self.mask = 2**torch.arange(self.bitlength, dtype=dtype)

    def forward(self, input):
Exemple #34
0
from torch.utils.cpp_extension import load
lltm_cuda = load(
    'lltm_cuda', ['lltm_cuda.cpp', 'lltm_cuda_kernel.cu'], verbose=True)
help(lltm_cuda)