def getImageTensors(target, events, imageLengths, ptsTimesValuesTensor, scaleSteps): ''' Generates images (w2ds), accessory information (bounds stepsValues and stepsTimes), and a mapping from pts to the image (valueBinIndices) ''' dim2d = np.empty((len(events),2)) for eventi, event in enumerate(events): dim2d[eventi,:] = \ np.array([scaleSteps, imageLengths[eventi]]) # value steps, time steps w2ds = np.empty(len(events),np.ndarray) stepsValues = np.empty(len(events), np.ndarray) stepsTimes = np.empty(len(events), np.ndarray) for eventi, event in enumerate(events): w2ds[eventi] = Variable(torch.DoubleTensor(dim2d[eventi]), requires_grad=False) stepsValues[eventi] = Variable(torch.Tensor(dim2d[0]+1), requires_grad=False).double() stepsTimes[eventi] = Variable(torch.Tensor(dim2d[1]+1), requires_grad=False).double() # w2ds = Variable(torch.Tensor(len(events), dim2d[0], dim2d[1]), requires_grad=False).double() hd = jcw_pywavelets.create_haar_dictionary(10, Variable) for eventi, event in enumerate(events): if event in ptsTimesValuesTensor: # print(event) w2ds[eventi], stepsValues[eventi], stepsTimes[eventi] = \ makeHistogramDecomposition(ptsTimesValuesTensor[event], ptsTimesValuesTensor[target], bins=dim2d[eventi], hd=hd) else: w2ds[eventi], stepsValues[eventi], stepsTimes[eventi] = None, None, None # N x event x Tmax N = ptsTimesValuesTensor[target].size()[0] valueBinIndices = np.empty(N, np.ndarray) for i in np.arange(N): valueBinIndices[i] = np.empty(len(events),np.ndarray) for eventi, event in enumerate(events): if event in ptsTimesValuesTensor: try: valueBinIndices[i][eventi] = \ valueTensorToValueBinTensor(ptsTimesValuesTensor[event][i,:,1], stepsValues[eventi]) except: pdb.set_trace() else: valueBinIndices[i][eventi] = None # print w2ds # print w2ds[2].matmul(hd[np.log2(w2ds[2].data.numpy().shape)[1]-1]).\ # t().matmul(hd[np.log2(w2ds[2].data.numpy().shape)[0]-1]).t() # this is the reconstruct instruction # plt.imshow(w2ds[2].matmul(hd[np.log2(w2ds[2].data.numpy().shape)[1]-1]). # t().matmul(hd[np.log2(w2ds[2].data.numpy().shape)[0]-1]).t().data.numpy(), # extent=(stepsTimes[2].data.numpy().min(), # stepsTimes[2].data.numpy().max(), # stepsValues[2].data.numpy().min(), # stepsValues[2].data.numpy().max()),origin='lower') # plt.show() return stepsValues, stepsTimes, w2ds, valueBinIndices, dim2d
def makeCrossCorrelationTensor(eTVT, tTT, causal=True, bins=16, ranges=None, dim=1, hd=None): if tTT.shape[2] != 1: tTT = tTT[:, :, 0] # Do computations. Can do sparsely in scipy if this becomes problematic N, Tmax, E = eTVT.shape if Tmax > 50: pdb.set_trace() dTV = eTVT[:, :, 0][:, None, :] - tTT[:, :, None] # diffs: N x Tmax x Tmax vTV = np.repeat(eTVT[:, :, 1][:, None, :], Tmax, axis=1) # values expanded: N x Tmax x Tmax dVTV = np.concatenate((dTV[:, :, :, None], vTV[:, :, :, None]), axis=3) # N x Tmax x Tmax x 2 valued = dVTV.transpose((3, 1, 0, 2)).reshape(E, N * Tmax * Tmax) # E x -1 valued = valued[:, ~np.isnan(valued[0, :])] # valued = valued[:,~np.isnan(valued[1,:])] # this removes any non-numerics if valued.shape[1] == 0: return None, None, None if causal: xmax = np.minimum(0., valued[0, :].max()) if valued[0, :].min() > xmax: return None, None, None else: xmax = valued[0, :].max() if ranges is None: ranges = [valued[0, :].min(), xmax] if valued[0, :].min() >= xmax: ranges[0] = xmax - 1 # causal applied to range if hd is None: if type(bins) == int: hdsize = bins elif type(bins) == np.array: hdsize = len(bins) else: print('Error in makeCrossCorrelationTensor bins parameter') return None, None, None hd = jcw_pywavelets.create_haar_dictionary(hdsize) # pdb.set_trace() if dim == 1: hist, histEdges = np.histogram(valued[0, :], bins=bins, range=ranges) hist = hist / np.count_nonzero(~np.isnan(valued[0, :])) hdis = np.log2(hist.shape) # htensor = Variable(torch.from_numpy(hist).double(), requires_grad=False) hwave = hist.dot(hd[hdis[0] - 1].transpose()) return hwave, histEdges, None return None, None, None
def timelineToWaveArrayTensors(target, ptsTimesValuesTensor, bins=16, ranges=None, causal=True): waveArrayTensor = {} # pdb.set_trace() tts = ptsTimesValuesTensor[target] for event in tqdm(ptsTimesValuesTensor.keys()): # if event == 'Person|GENDER_CONCEPT_ID:FEMALE|NA': # pdb.set_trace() b = bins if bins is None: b = 16 if type(bins) is dict: b = bins[event] r = ranges if type(ranges) is dict: r = ranges[event] cc, ccEdges, _ = makeCrossCorrelationTensor( ptsTimesValuesTensor[event], tts, causal=causal, bins=b, ranges=r, hd=jcw_pywavelets.create_haar_dictionary(10)) # print(cc, ccEdges) if cc is None or ccEdges is None: waveArrayTensor[event] = None else: waveArrayTensor[event] = { 'x': Variable(torch.from_numpy(ccEdges), requires_grad=False), 'wavelet': Variable(torch.from_numpy(cc), requires_grad=True) } return waveArrayTensor
# ##### DATA: Target data and generator input data def get_distribution_sampler(mu, sigma): return lambda n: torch.Tensor(np.random.normal(mu, sigma, (1, n))) # Gaussian def get_generator_input_sampler(): return lambda m, n: torch.rand( m, n) # Uniform-dist data into generator, _NOT_ Gaussian hd = [ w.float() for w in jpw.create_haar_dictionary(10, vectorType=Variable).values() ] def MakePermuteMatrix(ofsize): return Variable( torch.eye(ofsize)[torch.LongTensor(np.random.permutation(ofsize)), :]) def SignMatrix(ofsize): return Variable( torch.diag((torch.randn(ofsize) > 0.5).long() * 2 - 1).float()) def subsample(matrix, subsample_stride=1, flip_long=True): sx = subsample_stride
def makeHistogramDecomposition(eventTimesValuesTensor, targetTimesTensor, bins=(32,128), causal=True, ranges=None, logWhenPossible=True, hd=None): ''' Takes eTVT (N x Tmax x E) and tTT(N x Tmax x E) and returns the timeBased crossCorrelation 2d decomposition. Range setting overrides causal parameter. Note input: bins (scaleBins, timeBins) Note output: wdcp has scaleBins rows and timeBins columns Note fail output: wdcp is None and so are scaleBins and timeBins ''' # Convert out of Variables eTVT = eventTimesValuesTensor.data.numpy() tTT = targetTimesTensor.data.numpy() N, Tmax, E = tTT.shape if tTT.shape[2] != 1: tTT = tTT[:,:,0] # Do computations. Can do sparsely in scipy if this becomes problematic dTV = eTVT[:,:,0][:,None,:] - tTT[:,:,None] # diffs: N x Tmax x Tmax vTV = np.repeat(eTVT[:,:,1][:,None,:], Tmax, axis=1) # values expanded: N x Tmax x Tmax dVTV = np.concatenate((dTV[:,:,:,None], vTV[:,:,:,None]), axis=3) # N x Tmax x Tmax x 2 valued = dVTV.transpose((3,1,0,2)).reshape(E, N*Tmax*Tmax) # E x -1 valued = valued[:,~np.isnan(valued[0,:])] valued = valued[:,~np.isnan(valued[1,:])] # necessary? # Create 2d histogram if valued.shape[1] == 0: return None, None, None # pass v0min, v0max, v1min, v1max = valued[0,:].min(), valued[0,:].max(), valued[1,:].min(), valued[1,:].max() if ranges is None: # set ranges if causal: xmax = np.minimum(0.,v0max) if(v0min > xmax): return Variable(torch.zeros(1,1).double(), requires_grad=True),\ Variable(torch.ones(1).double(), requires_grad=False), \ Variable(torch.ones(1).double(), requires_grad=False) else: xmax = v0max if v1min == v1max: print('Warning, only one value detected in image construction (event value). Setting range at +/- 1 of value') v1min = v1min - 1 v1max = v1max + 1 if v0min == v0max or v0min == xmax: print('Warning, only one value detected in image construction (time). Setting range at +/- 1 of value') v0min = v0min - 1 v0max = v0max + 1 if causal: v0max = np.minimum(0, v0max) xmax = v0max if v0min >= v0max: v0min = v0max - 1 ranges = [[v1min, v1max], [v0min, xmax]] # pdb.set_trace() else: if causal: ranges = np.array(ranges).copy() ranges[1,1] = np.minimum(0,ranges[1,1]) # apply causality if logWhenPossible and (valued.shape[1] > 0 and valued[1,:].min() > 0): bins = [np.power(2, np.linspace(np.log2(v1min), np.log2(v1max),bins[0]+1)), np.linspace(v0min,xmax,bins[1]+1)] # pdb.set_trace() if type(bins[1]) is np.ndarray: # print bins[1][1:]-bins[1][:-1] if np.any(bins[1][1:]-bins[1][:-1] < 0): pdb.set_trace() if len(bins[1]) == 1: print('Warning, time bins input was 0. returning image None') return None, None, None if type(bins[1]) is not np.ndarray: if bins[1] == 0: return None, None, None # print(bins, v0max, v0min, xmax) try: counts, vs, ts = np.histogram2d(valued[1,:],valued[0,:],bins=bins, range=ranges) # TODO add weights counts = counts/len(tTT.flatten()[~np.isnan(tTT.flatten())]) except ValueError: print('Warning: despite checks, attempted to make histogram') print(valued[1,:], valued[0,:]) return None, None, None if hd is None: hd = jcw_pywavelets.create_haar_dictionary(np.maximum(np.log2(bins[0].shape[0],bins[1].shape[0]))) hd = [Variable(torch.from_numpy(h),requires_grad=False) for h in hd] hdis = np.log2(counts.shape) wdcp = Variable(torch.from_numpy(counts).double(), requires_grad=False) wdcp = wdcp.t().matmul(hd[hdis[0]-1].t().cpu()).t().matmul(hd[hdis[1]-1].t().cpu()) wdcp = Variable(wdcp.data, requires_grad=True) # decompose is over rows first, then columns -> reconstruct needs to be columns first, then rows. return wdcp, \ Variable(torch.from_numpy(vs).double(), requires_grad=False), \ Variable(torch.from_numpy(ts).double(), requires_grad=False)