def load_sick_data(): """ Attempt to load sick data from binary, otherwise fall back to txt. """ try: if config.DEBUG: stdout.write('loading sick from archives.. ') sick_data = [] for element in sPickle.s_load(open('sick.pickle')): sick_data.append(element) except IOError: if config.DEBUG: stdout.write(' error - loading from txt-files..') sick_data = [] for line in open(os.path.join(config.working_path,'SICK_all.txt')): if line.split()[0] != 'pair_ID': sick_data.append(load_sick_data_from_folder(line.split()[0])) # Sort according to SICK_all.txt with open('sick.pickle', 'wb') as out_f: sPickle.s_dump(sick_data, out_f) if config.DEBUG: stdout.write(' done!\n') return sick_data
def load_sick_data(): """ Attempt to load sick data from binary, otherwise fall back to txt. """ try: if config.DEBUG: stdout.write('loading sick from archives.. ') sick_data = [] for element in sPickle.s_load(open('sick.pickle')): sick_data.append(element) except IOError: if config.DEBUG: stdout.write(' error - loading from txt-files..') sick_data = [] for line in open(os.path.join(config.working_path, 'SICK_all.txt')): if line.split()[0] != 'pair_ID': sick_data.append(load_sick_data_from_folder(line.split()[0])) # Sort according to SICK_all.txt with open('sick.pickle', 'wb') as out_f: sPickle.s_dump(sick_data, out_f) if config.DEBUG: stdout.write(' done!\n') return sick_data
def test_empty(self): with open(self.testfn, 'wb') as f: sPickle.s_dump([], f) with open(self.testfn, 'rb') as f: for elt in sPickle.s_load(f): self.fail('found element for stream that should be empty: ' + str(elt))
def store(self, dataset, datasetName, use_sPickle=True): name = self.extract_last_component(datasetName) logger.info("Storing dataset: {}".format(name)) # logger.info(dataset) if use_sPickle: sPickle.s_dump(dataset, open(self.dataset_location+"/"+name, "wb" )) else: pickle.dump(dataset, open(self.dataset_location+"/"+name, "wb" ))
import numpy as np import librasa import sPickle source_path = "/root/data/tzanetakis/ver9.0/" dest_path = "/root/data/tzanetakis/ver9.1/" def wave2mel(sample): logam = librosa.logamplitude melgram = librosa.feature.melspectrogram longgrid = logam(melgram(y=sample, sr=22050, n_fft=1024, n_mels=128), ref_power=1.0) return longgrid.flatten() for root, dirs, files in os.walk(source_path): for name in files: if ".p" in name: arr = sPickle.s_load(open(root + '/' + name, 'rb')) dest = [] for a in arr: b = wave2mel(a) dest.append(b) dest = np.asarray(dest) print name, dest.shape sPickle.s_dump(dest, open(dest_path + name))
# -*- coding: utf-8 -*- # Dylan @ 2016-04-24 20:23:07 import os import numpy import sPickle labels=[] NBYTE=(35+1)*3+1 #mgc/lf0(3 windows)/uvflag for file in [os.path.join("cmp1",f) for f in sorted(os.listdir("cmp1/"))]: labelarray = numpy.fromfile(file,dtype=numpy.float32) assert len(labelarray)%NBYTE==0 nFrame= len(labelarray)/NBYTE labels+=list(labelarray.reshape(nFrame,NBYTE)) #print nFrame labels=numpy.array(labels) l_mean=labels.mean(axis=0) l_std=labels.std(axis=0) print len(labels) labels=(labels-l_mean)/l_std with open("labels.pkl","wb") as fp: sPickle.s_dump(l_mean,fp) sPickle.s_dump(l_std,fp) sPickle.s_dump(labels,fp)
def writefigurepositions(fname, positions): f = open(fname, 'wb') sPickle.s_dump(positions, f) f.close()
print(root, gid) for name in files: # CHANGE HERE FOR FILE TYPE if 'wav' in name or 'au' in name: parseAudio(gid, sid, root + '/' + name) sid += 1 if sid != 0: gid += 1 # Normalize the data x_train = np.asarray(x_train) y_train = np.asarray(y_train) x_test = np.asarray(x_test) y_test = np.asarray(y_test) x_holdout = np.asarray(x_holdout) y_holdout = np.asarray(y_holdout) print("x_train: " + str(x_train.shape)) print("y_train: " + str(y_train.shape)) print("x_test: " + str(x_test.shape)) print("y_test: " + str(y_test.shape)) print("x_holdout: " + str(x_holdout.shape)) print("y_holdout: " + str(y_holdout.shape)) sPickle.s_dump(x_train, open(dest_path + 'x_train_mel.p', 'wb')) sPickle.s_dump(y_train, open(dest_path + 'y_train_mel.p', 'wb')) sPickle.s_dump(x_test, open(dest_path + 'x_test_mel.p', 'wb')) sPickle.s_dump(y_test, open(dest_path + 'y_test_mel.p', 'wb')) sPickle.s_dump(x_holdout, open(dest_path + 'x_holdout_mel.p', 'wb')) sPickle.s_dump(y_holdout, open(dest_path + 'y_holdout_mel.p', 'wb'))
import sPickle lst = range(101) sPickle.s_dump(lst, open('lst.spkl', 'w')) sum = 0 for element in sPickle.s_load(open('lst.spkl')): sum += element print sum print def process_data(s): return len(s) sPickle.s_dump((process_data(line.split(',')[0]) for line in open('input.csv')), open('lst1.spkl', 'w')) for elt in sPickle.s_load(open('lst1.spkl')): print elt print f = open('lst2.spkl', 'w') for line in open('input.csv'): sPickle.s_dump_elt(process_data(line.split(',')[0]), f) f.close() for elt in sPickle.s_load(open('lst2.spkl')): print elt print l = range(10)
for x in xrange(len(dataarray)): if feattypedict[featnamearray[x]]==1: vector=[0 for y in featdict[featnamearray[x]].values()] vector[featdict[featnamearray[x]][dataarray[x]]]=1 featurearray=featurearray+vector else : pass featurearrays.append(featurearray) numberfeatures=numpy.array(numberfeatures,dtype=numpy.float32) mean=numberfeatures.mean(axis=0) std=numberfeatures.std(axis=0) numberfeatures=(numberfeatures-mean)/std vectorfeatures=numpy.array(featurearrays,dtype=numpy.float32) assert len(numberfeatures)==len(vectorfeatures) allfeatures=numpy.hstack((vectorfeatures,vectorfeatures)) print len(featurearrays) output =open('allfeat.pkl','wb') sPickle.s_dump(mean,output) sPickle.s_dump(std,output) sPickle.s_dump(allfeatures,output) output.close() print "end"
for dataarray in dataarrays: featurearray = [] for x in xrange(len(dataarray)): if feattypedict[featnamearray[x]] == 1: vector = [0 for y in featdict[featnamearray[x]].values()] vector[featdict[featnamearray[x]][dataarray[x]]] = 1 featurearray = featurearray + vector else: pass featurearrays.append(featurearray) numberfeatures = numpy.array(numberfeatures, dtype=numpy.float32) mean = numberfeatures.mean(axis=0) std = numberfeatures.std(axis=0) numberfeatures = (numberfeatures - mean) / std vectorfeatures = numpy.array(featurearrays, dtype=numpy.float32) assert len(numberfeatures) == len(vectorfeatures) allfeatures = numpy.hstack((vectorfeatures, vectorfeatures)) print len(featurearrays) output = open('allfeat.pkl', 'wb') sPickle.s_dump(mean, output) sPickle.s_dump(std, output) sPickle.s_dump(allfeatures, output) output.close() print "end"
#!/usr/bin/env python # -*- coding: utf-8 -*- # Dylan @ 2016-04-24 20:23:07 import os import numpy import sPickle labels = [] NBYTE = (35 + 1) * 3 + 1 #mgc/lf0(3 windows)/uvflag for file in [os.path.join("cmp1", f) for f in sorted(os.listdir("cmp1/"))]: labelarray = numpy.fromfile(file, dtype=numpy.float32) assert len(labelarray) % NBYTE == 0 nFrame = len(labelarray) / NBYTE labels += list(labelarray.reshape(nFrame, NBYTE)) #print nFrame labels = numpy.array(labels) l_mean = labels.mean(axis=0) l_std = labels.std(axis=0) print len(labels) labels = (labels - l_mean) / l_std with open("labels.pkl", "wb") as fp: sPickle.s_dump(l_mean, fp) sPickle.s_dump(l_std, fp) sPickle.s_dump(labels, fp)
[x_train.append(x) for x in chunks] [y_train.append(x) for x in [oneLabel] * len(chunks)] print "parsed", name, "as training data" else: [x_test.append(x) for x in chunks] [y_test.append(x) for x in [oneLabel] * len(chunks)] print "parsed", name, "as testing data" sid += 1 x_train = np.asarray(x_train) y_train = np.asarray(y_train) x_test = np.asarray(x_test) y_test = np.asarray(y_test) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= np.amax(x_train) x_test /= np.amax(x_test) print "done parsing and normalizing, final shapes are" print x_train.shape print y_train.shape print x_test.shape print y_test.shape sPickle.s_dump(x_train, open(destPath + 'x_train.p', 'wb')) sPickle.s_dump(y_train, open(destPath + 'y_train.p', 'wb')) sPickle.s_dump(x_test, open(destPath + 'x_test.p', 'wb')) sPickle.s_dump(y_test, open(destPath + 'y_test.p', 'wb'))
# first create the binary matrix for victims/attackers print 'Computing the binary matrix for victim/attackers...' binary_data = dict() binary_data = bm.compute_binary_matrix(uni_target_ips, train_set, train_w_length, offset, i) # then run the CA algorithm if ca_dict_computed == False: # compute the densities - i.e. run CA algorithm print 'Running CA clustering algorithm - computing density matrix...' ca_densities = dict() ca_densities = ca.compute_density_matrix(uni_target_ips, uni_attacker_ips, binary_data, train_w_length, i) sPickle.s_dump(ca_densities.iteritems(), open("densities" + str(i) + ".spkl", "w")) else: # load the computed density matrix for the window print 'Loading CA density matrix from file...' #ca_densities = dict() ca_densities = dict( sPickle.s_load(open("densities" + str(i) + ".spkl"))) # compute the denominator needed for the similarities and store it in a dictionary print 'Computing the denominator for similarities...' sim_denom = dict() sim_denom = sim.compute_denominator(train_set, uni_target_ips, train_w_length, offset, i, start_day) # find similarities between victims
def _dump(self, iterable): with open(self.testfn, 'wb') as f: sPickle.s_dump(iterable, f)