def newload(filename, verbose=1): """ Intended to replace load() in numpy """ from numpy import load as loadz from numpy import cumsum dic = loadz(filename) # if dic['version'] != None: # if len((dic.files=='version').nonzero())>0: if len(dic.files) > 3: if verbose > 2: print("local v%d " % (dic['version'])), else: if verbose > 2: print("local v0: simple "), return (dic) # quick, minimal return if verbose > 2: print(' contains %s' % dic.files) signalexpr = dic['signalexpr'] timebaseexpr = dic['timebaseexpr'] # savez saves ARRAYS always, so have to turn array back into scalar exec(signalexpr.tolist()) exec(timebaseexpr.tolist()) return ({ "signal": signal, "timebase": timebase, "parent_element": dic['parent_element'] })
def load_knowledge(net, filepath): """ Load the weights and biasses for an already network from disk. """ reloaded = loadz(filepath) knowledge = [(name, reloaded[name]) for name in sorted(reloaded.keys())] set_knowledge(net, knowledge)
def test_compress(file=None, verbose=0, eps=0, debug=False, maxcount=0): """ Used in developing the save compress routines. Not tested since then >>> test_compress() Looks like it only saves the time series, not the rest. """ from numpy import load as loadz print("Testing %s" % file) if file is None: file='18993_densitymediaIR.npz' test=loadz(file) stat=os.stat(file) if verbose > 0: print("=========== testing signal compression ===========") sig=discretise_array(test['signal'],eps=eps,verbose=verbose,maxcount=maxcount) if verbose > 0: print("=========== testing timebase compression===========") tim=discretise_array(test['timebase'],eps=eps,verbose=verbose) print(' File length %d bytes, %d samples, %.3g bytes/sample' % ( stat.st_size ,len(sig['iarr']), float(stat.st_size)/len(sig['iarr']))) temp='temp.npz' savez(temp,sig['iarr']) print(" compressed to %d bytes" % os.stat(temp).st_size) savez(temp,diff(sig['iarr'])) print(" differences compressed to %d bytes" % os.stat(temp).st_size) savez(temp,diff(diff(sig['iarr']))) print(" double differences compressed to %d bytes" % os.stat(temp).st_size) print(" time compressed to %d bytes" % os.stat(temp).st_size) savez(temp,diff(tim['iarr'])) print(" difference compressed to %d" % os.stat(temp).st_size) savez(temp,diff(diff(tim['iarr']))) print(" double difference compressed to %d" % os.stat(temp).st_size) if debug: xx=1/0
def deserialize(filename, axes): xydata = np.loadz(filename) ax_lines_xydata = {} for key, val in xydata.items(): _, istr, _, jstr, xy = key.split("_") i, j = int(istr), int(jstr) ax_lines_xydata.setdefault(i, {}).setdefault(j, {})[xy] = val return ax_lines_xydata
def load_fft_cache(self, fname): a = np.loadz(fname) self._aps_dd = a['dd'] self._aps_dv = a['dv'] self._aps_vv = a['vv'] self._aps_cache = True
def show_npz(path): data = np.loadz(path) # dict of arrays # if keyError occurs, first print data.keys() first = data["arr_0"] second = data["arr_1"] third = data["arr_2"] show_img(first) show_img(second) show_img(third)
def newload(filename, verbose=1): """ Intended to replace load() in numpy """ from numpy import load as loadz from numpy import cumsum dic=loadz(filename) # if dic['version'] != None: # if len((dic.files=='version').nonzero())>0: if len(dic.files)>3: if verbose>2: print ("local v%d " % (dic['version'])), else: if verbose>2: print("local v0: simple "), return(dic) # quick, minimal return if verbose>2: print(' contains %s' % dic.files) signalexpr=dic['signalexpr'] timebaseexpr=dic['timebaseexpr'] # savez saves ARRAYS always, so have to turn array back into scalar exec(signalexpr.tolist()) exec(timebaseexpr.tolist()) return({"signal":signal, "timebase":timebase, "parent_element": dic['parent_element']})
def test_compress(file=None, verbose=0, eps=0, debug=False, maxcount=0): """ Used in developing the save compress routines. Not tested since then >>> test_compress() Looks like it only saves the time series, not the rest. """ from numpy import load as loadz print("Testing %s" % file) if file is None: file = '18993_densitymediaIR.npz' # on drive-n-go test = loadz(file) stat = os.stat(file) if verbose > 0: print("=========== testing signal compression ===========") sig = discretise_array(test['signal'], eps=eps, verbose=verbose, maxcount=maxcount) if verbose > 0: print("=========== testing timebase compression===========") tim = discretise_array(test['timebase'], eps=eps, verbose=verbose) print(' File length %d bytes, %d samples, %.3g bytes/sample' % (stat.st_size, len( sig['iarr']), float(stat.st_size) / len(sig['iarr']))) temp = 'temp.npz' savez(temp, sig['iarr']) print(" compressed to %d bytes" % os.stat(temp).st_size) savez(temp, diff(sig['iarr'])) print(" differences compressed to %d bytes" % os.stat(temp).st_size) savez(temp, diff(diff(sig['iarr']))) print(" double differences compressed to %d bytes" % os.stat(temp).st_size) print(" time compressed to %d bytes" % os.stat(temp).st_size) savez(temp, diff(tim['iarr'])) print(" difference compressed to %d" % os.stat(temp).st_size) savez(temp, diff(diff(tim['iarr']))) print(" double difference compressed to %d" % os.stat(temp).st_size) if debug: xx = 1 / 0
def newloadv3(filename, verbose=1): """ This the the version that works in python 3, but can't handle Nans Intended to replace load() in numpy counterpart is data/savez_compress.py """ from numpy import load as loadz from numpy import cumsum, array dic=loadz(filename) # if dic['version'] != None: # if len((dic.files=='version').nonzero())>0: if len(dic.files)>3: if verbose>2: print ("local v%d " % (dic['version']),end='') else: if verbose>2: print("local v0: simple ", end='') return(dic) # quick, minimal return if verbose>2: print(' contains %s' % dic.files) signalexpr=dic['signalexpr'] timebaseexpr=dic['timebaseexpr'] if 'time_unit_in_seconds' in dic: timeunitexpr = dic['time_unit_in_seconds'] else: timeunitexpr = array(1) # savez saves ARRAYS always, so have to turn array back into scalar # exec(signalexpr.tolist()) # Changed exec code to eval for python3, otherwise the name was not defined # for the target variables - they could only be accessed with # e.g. locals().signal # retdic = {"signal":locals()['signal'], "timebase":locals()['timebase'], # "parent_element": dic['parent_element']} # Sucess using eval instead of exec signal = eval(signalexpr.tolist().split(b'=')[1]) time_unit_in_seconds = timeunitexpr.tolist() timebase = time_unit_in_seconds * eval(timebaseexpr.tolist().split(b'=')[1]) retdic = {"signal":signal, "timebase":timebase, "parent_element": dic['parent_element'], "params": dic['params'].tolist()} return(retdic)
def main(): argument_parser = ArgumentParser() argument_parser.add_argument("--matrix", type=str, required=True) argument_parser.add_argument( "--method", type=str, required=False, choices=['zca', 'pca', 'cholesky', 'zca_cor', 'pca_cor'], default='zca') args = argument_parser.parse_args() matrix_path: str matrix_path = args.matrix matrix_whitened = None if matrix_path.endswith(".npy"): matrix_whitened = whiten(np.load(matrix_path), method=args.method) elif matrix_path.endswith(".npz"): matrix_whitened = whiten(np.loadz(matrix_path).toarray(), method=args.method) output_path = matrix_path[:-4] + f"_whitened-{args.method}.npy" np.save(output_path, matrix_whitened)
def newload(filename, verbose=verbose): """ Intended to replace load() in numpy This is being used with nan data. The version in data/base.py is closer to python 3 compatible, but can't deal with the nans yet. """ from numpy import load as loadz from numpy import cumsum dic=loadz(filename) # if dic['version'] != None: # if len((dic.files=='version').nonzero())>0: if len(dic.files)>3: if verbose>2: print ("local v%d " % (dic['version'])), else: if verbose>2: print("local v0: simple "), return(dic) # quick, minimal return if verbose>2: print(' contains %s' % dic.files) # savez saves ARRAYS always, so have to turn array back into scalar signalexpr=dic['signalexpr'].tolist() timebaseexpr=dic['timebaseexpr'].tolist() # fixup for files written with np.nan removal and and cumsum if ('cumsum' in timebaseexpr) and ('np.nan' in timebaseexpr): print('!!!!!!!!!!!! faking a fixup of nans with cumsum !!!!!!!!!!!!!!!!!!') timebaseexpr = timebaseexpr.replace("timebase=", "temp=").replace("*2e-06","\ntimebase=temp*2e-06") timebaseexpr = timebaseexpr.replace("== dic['rawtimebase']","== temp") exec(signalexpr) if dic['version'] <= 103 and timebaseexpr.startswith('timebase=0+'): # bdb103 timebaseexpr = timebaseexpr.replace('timebase=0+','timebase=0.+') exec(timebaseexpr) retdic = {"signal":signal, "timebase":timebase, "parent_element": dic['parent_element']} if 'params' in dic: retdic.update({"params": dic['params'].tolist()}) return(retdic)
def newload(filename, verbose=verbose): """ Intended to replace load() in numpy This is being used with nan data. The version in data/base.py is closer to python 3 compatible, but can't deal with the nans yet. """ from numpy import load as loadz from numpy import cumsum dic = loadz(filename, allow_pickle=True) # if dic['version'] != None: # if len((dic.files=='version').nonzero())>0: if len(dic.files) > 3: if verbose > 2: print("local v%d " % (dic['version'])), else: if verbose > 2: print("local v0: simple "), return (dic) # quick, minimal return if verbose > 2: print(' contains %s' % dic.files) # savez saves ARRAYS always, so have to turn array back into scalar signalexpr = dic['signalexpr'].tolist() timebaseexpr = dic['timebaseexpr'].tolist() exec(signalexpr) # fixup for (old) files written with both np.nan removal and and cumsum # e.g. "timebase=0+dic['rawtimebase']*0.0064514\nmaxint = np.iinfo(dic['rawtimebase'].dtype).max\nwnan = np.where(maxint == dic['rawtimebase'])[0]\nif len(wnan)>0:\n timebase[wnan]=np.nan" # the problem here is that the maxint value is scaled BEFORE comparison. # the fix is to scale after comparison # # A correct solution is (without delta_encode_time) # perhaps the only files with this error also have a cumsum? # timebase=0+dic['rawtimebase']*0.0064514 # maxint = np.iinfo(dic['rawtimebase'].dtype).max # wnan = np.where(maxint == dic['rawtimebase'])[0] # if len(wnan)>0: # timebase[wnan]=np.nan # *Without* care to avoid nans in cumsum if ('cumsum' in timebaseexpr) and ('np.nan' in timebaseexpr) and dic['version'] < 105: print( 'newload: !!! kludging a fixup of nans in a timebase with cumsum !!' ) if "*2e-06" not in timebaseexpr: raise LookupError( '*2e-06 expected in timebaseexpr:\n' + timebaseexpr + "\n Delete this line in save compress.py to try a good fixup") dt_embedded = timebaseexpr.split("'rawtimebase'])*")[-1] try: testing = float(dt_embedded) except Exception as reason: raise ValueError('newload: failed to find a factor in cumsum ', str(reason)) timebaseexpr = timebaseexpr.replace("timebase=", "temp=").replace( '*' + dt_embedded, "\ntimebase=temp*" + dt_embedded) else: timebaseexpr = timebaseexpr.replace("timebase=", "temp=").replace( "*2e-06", "\ntimebase=temp*2e-06") timebaseexpr = timebaseexpr.replace("== dic['rawtimebase']", "== temp") # check that one doesn't sneak through - the old fix was temp, the new (106) one is detectable by iinfo if ('cumsum' in timebaseexpr) and ('np.nan' in timebaseexpr) and not ( ('temp' in timebaseexpr) or ('iinfo' in timebaseexpr)): raise ValueError( 'timebaseexpr has a potential issue with maxint and nans\n' + timebaseexpr) if dic['version'] <= 104 and timebaseexpr.startswith( 'timebase=0+'): # bdb103 - needed for 104 too timebaseexpr = timebaseexpr.replace('timebase=0+', 'timebase=0.+') exec(timebaseexpr) retdic = { "signal": signal, "timebase": timebase, "parent_element": dic['parent_element'] } if not dic.allow_pickle: # should be able to remove this, just a backstop pyfusion.utils.warn('resetting allow_pickle') dic.allow_pickle = True if 'params' in dic: retdic.update({"params": dic['params'].tolist()}) if (len(signal) > 5e5) and (verbose > 0): print('Need to call with verbose>0 to see type of large data files') else: if 'rawsignal' in dic: retdic.update(dict(signaltype=type(dic['rawsignal'][0]))) if 'rawtimebase' in dic: retdic.update(dict(timebasetype=type(dic['rawtimebase'][0]))) return (retdic)
def extract_features(feature_extractor, dataset, batch_size, loader_params, average=True, num_gpus=1, average_num=None, preemption_protection=False, is_leader=True): sampler = torch.utils.data.distributed.DistributedSampler(dataset, shuffle=False)\ if num_gpus > 1 else None loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, sampler=sampler, num_workers=loader_params.num_workers, pin_memory=loader_params.pin_memory, drop_last=False) features = None count = 0 starting_iter = -1 if preemption_protection and os.path.exists('feature_extraction.tmp.npz'): data = np.loadz('feature_extraction.tmp.npz') features = torch.Tensor(data['features']).cuda() count = data['count'] starting_iter = data['curr_iter'] for curr_iter, (inputs, labels) in enumerate(loader): if preemption_protection and curr_iter <= starting_iter: continue inputs, labels = inputs.cuda(), labels.cuda(non_blocking=True) curr_features = feature_extractor.extract(inputs) if average and average_num is None: curr_features = torch.sum(curr_features, dim=0) if num_gpus > 1: torch.distributed.all_reduce(curr_features) features = ( features + curr_features.detach().cpu() ) if features is not None else curr_features.detach().cpu() elif average: num_features = len(dataset) // average_num if num_gpus > 1: curr_features = distributed_gather_features( curr_features, batch_size, num_gpus) if features is None: features = torch.zeros(num_features, curr_features.size(-1)) if count + curr_features.size(0) > num_features: remainder = count + curr_features.size(0) - num_features features[count:, :] += curr_features[:num_features - count, :].detach().cpu() offset = 0 while remainder > num_features: features += curr_features[offset + num_features - count:2 * num_features - count + offset].detach().cpu() offset += num_features remainder -= num_features features[:remainder, :] += curr_features[ offset + num_features - count:, :].detach().cpu() count = remainder else: features[ count:count + curr_features.size(0), :] += curr_features.detach().cpu() count += curr_features.size(0) count = count % num_features else: if num_gpus > 1: curr_features = distributed_gather_features( curr_features, batch_size, num_gpus) if features is None: features = torch.zeros(len(dataset), curr_features.size(-1)) features[count:count + curr_features.size(0), :] = curr_features.detach().cpu() count += curr_features.size(0) if preemption_protection and curr_iter % 5000 == 0 and is_leader: np.savez('feature_extraction.tmp.npz', features=features.detach().cpu().numpy(), count=count, curr_iter=curr_iter) if average and average_num is None: features /= len(dataset) elif average: features /= average_num return features.detach().cpu().numpy()
parser = argparse.ArgumentParser( description='train an event2vec model on events in context') parser.add_argument('eventfile', help='the file containing the events') parser.add_argument('--dimension', type=int, default=100) parser.add_argument('--relations', type=int, default=4) parser.add_argument('--window', type=int, default=2) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--embeddings') args = parser.parse_args() ec = EventContextData(args.eventfile) if args.embeddings: embeddings = np.loadz(args.embeddings) size = embeddings.shape[0] else: embeddings = None size = args.size er = EventContextRNN(args.dimension, size, args.relations, embeddings=embeddings) eventDistribution = ec.eventProbs for i in range(args.num_epochs): print('epoch {}'.format(i)) g = ec.iterEventContext(args.window, shuffle=True)
gen_sen = [] for x in gen_sentences: gen_sen.append(x[0]) #gen_sen.append(numpy.zeros(30).tolist()) print len(gen_sen) print len(orig_sen) gen_s = pad_list(gen_sen) print gen_s.shape #5000 gen #2144 orig orig_s = np.loadz('orig_s.npz') gen_s = np.loadz('gen_s.npz') print "compiling" d = discriminator(number_words=30000, num_hidden=400, seq_length=seq_length, mb_size=64) print "training started" for i in range(0, 20): u = random.uniform(0, 1) indexGen = random.randint(0, 200 / 64) indexOrig = random.randint(0, 200 / 64)
def main(): #import sklearn.linear_model as lm datasets = get_dataset.all_names() indx = textmenu(datasets) if indx == None: return dataset = datasets[indx] x_tra, y_tra, x_val, y_val = get_dataset.get_dataset(dataset) d = np.load('/data/data/mnist.npz') X = d['X'] Y = d['Y'].ravel() Xtest = d['Xtest'] Ytest = d['Ytest'].ravel() print 'data loaded' filter_size = [5, 5, 1, 200] stride = 2 # sample patches to determine median of patch distance. # sample filters (W, B) to create RBF # apply filters # sample patches to determine patch mean. # PCA patches. #PCA first n_pca_dim = 50 X_m = np.mean(X, axis=0) # mean X_zm = X - X_m # X with zero mean X_cov = X_zm.T.dot(X_zm) # X covariance eigval, eigvec = la.eig(X_cov) eigvec = eigvec[:, :n_pca_dim] # choose the dominanting 50 dimensions Xp = X.dot(eigvec) # projections of X,Xtest to these 50 dim. Xtestp = Xtest.dot(eigvec) # Compute kernel step size s (median of dist among points) n_trials = int(Xp.shape[0]**1.5) I = random.randint(0, Xp.shape[0], n_trials) deltI = random.randint(1, Xp.shape[0], n_trials) J = (I + deltI) % X.shape[0] dists = sorted( map(lambda i: la.norm(Xp[I[i], :] - Xp[J[i], :]), range(n_trials))) s = dists[n_trials / 2] # generate rbf params n_rbf = 4000 W = random.randn(Xp.shape[1], n_rbf) / s / np.sqrt(2) B = random.uniform(0, 2 * np.pi, n_rbf) #Xf = np.cos(Xp.dot(W)+ B) #Xtestf = np.cos(Xtestp.dot(W)+B) np.savez('mnist_pca_rbf_param.npz', P=eigvec, W=W, B=B) np.savez('hw2_mnist.npz', X=X, Y=Y, Xtest=Xtest, Ytest=Ytest, P=eigvec, W=W, B=B) d2 = np.loadz('hw2_mnist.npz') scipy.io.savemat('hw2_mnist.mat', d2)