def load_sick_data():
    """
    Attempt to load sick data from binary,
    otherwise fall back to txt.
    """
    try:
        if config.DEBUG: stdout.write('loading sick from archives.. ')

        sick_data = []
        for element in sPickle.s_load(open('sick.pickle')):
            sick_data.append(element)

    except IOError:
        if config.DEBUG: stdout.write(' error - loading from txt-files..')
        
        sick_data = []
        for line in open(os.path.join(config.working_path,'SICK_all.txt')):
            if line.split()[0] != 'pair_ID':
                sick_data.append(load_sick_data_from_folder(line.split()[0]))

        # Sort according to SICK_all.txt
        with open('sick.pickle', 'wb') as out_f:
            sPickle.s_dump(sick_data, out_f)
    
    if config.DEBUG:
        stdout.write(' done!\n')

    return sick_data
def load_sick_data():
    """
    Attempt to load sick data from binary,
    otherwise fall back to txt.
    """
    try:
        if config.DEBUG: stdout.write('loading sick from archives.. ')

        sick_data = []
        for element in sPickle.s_load(open('sick.pickle')):
            sick_data.append(element)

    except IOError:
        if config.DEBUG: stdout.write(' error - loading from txt-files..')

        sick_data = []
        for line in open(os.path.join(config.working_path, 'SICK_all.txt')):
            if line.split()[0] != 'pair_ID':
                sick_data.append(load_sick_data_from_folder(line.split()[0]))

        # Sort according to SICK_all.txt
        with open('sick.pickle', 'wb') as out_f:
            sPickle.s_dump(sick_data, out_f)

    if config.DEBUG:
        stdout.write(' done!\n')

    return sick_data
 def test_empty(self):
     with open(self.testfn, 'wb') as f:
         sPickle.s_dump([], f)
     with open(self.testfn, 'rb') as f:
         for elt in sPickle.s_load(f):
             self.fail('found element for stream that should be empty: ' +
                       str(elt))
Beispiel #4
0
 def store(self, dataset, datasetName, use_sPickle=True):
     name = self.extract_last_component(datasetName)
     logger.info("Storing dataset: {}".format(name))
     # logger.info(dataset)
     if use_sPickle:
         sPickle.s_dump(dataset, open(self.dataset_location+"/"+name, "wb" ))
     else:
         pickle.dump(dataset, open(self.dataset_location+"/"+name, "wb" ))
Beispiel #5
0
import numpy as np
import librasa
import sPickle

source_path = "/root/data/tzanetakis/ver9.0/"
dest_path = "/root/data/tzanetakis/ver9.1/"


def wave2mel(sample):
    logam = librosa.logamplitude
    melgram = librosa.feature.melspectrogram
    longgrid = logam(melgram(y=sample, sr=22050, n_fft=1024, n_mels=128),
                     ref_power=1.0)
    return longgrid.flatten()


for root, dirs, files in os.walk(source_path):
    for name in files:
        if ".p" in name:
            arr = sPickle.s_load(open(root + '/' + name, 'rb'))
            dest = []
            for a in arr:
                b = wave2mel(a)
                dest.append(b)
            dest = np.asarray(dest)
            print name, dest.shape
            sPickle.s_dump(dest, open(dest_path + name))
# -*- coding: utf-8 -*-
# Dylan @ 2016-04-24 20:23:07

import os 
import numpy
import sPickle

labels=[]
NBYTE=(35+1)*3+1 #mgc/lf0(3 windows)/uvflag
for file in [os.path.join("cmp1",f) for f in sorted(os.listdir("cmp1/"))]:
    labelarray = numpy.fromfile(file,dtype=numpy.float32)
    assert len(labelarray)%NBYTE==0 
    nFrame= len(labelarray)/NBYTE
    labels+=list(labelarray.reshape(nFrame,NBYTE))
    #print nFrame

labels=numpy.array(labels)
l_mean=labels.mean(axis=0)
l_std=labels.std(axis=0)

print len(labels)
labels=(labels-l_mean)/l_std

with open("labels.pkl","wb") as fp:
    sPickle.s_dump(l_mean,fp)
    sPickle.s_dump(l_std,fp)
    sPickle.s_dump(labels,fp)



def writefigurepositions(fname, positions):
    f = open(fname, 'wb')
    sPickle.s_dump(positions, f)
    f.close()
        print(root, gid)
        for name in files:
            # CHANGE HERE FOR FILE TYPE
            if 'wav' in name or 'au' in name:
                parseAudio(gid, sid, root + '/' + name)
                sid += 1
        if sid != 0:
            gid += 1

# Normalize the data
x_train = np.asarray(x_train)
y_train = np.asarray(y_train)
x_test = np.asarray(x_test)
y_test = np.asarray(y_test)
x_holdout = np.asarray(x_holdout)
y_holdout = np.asarray(y_holdout)

print("x_train: " + str(x_train.shape))
print("y_train: " + str(y_train.shape))
print("x_test: " + str(x_test.shape))
print("y_test: " + str(y_test.shape))
print("x_holdout: " + str(x_holdout.shape))
print("y_holdout: " + str(y_holdout.shape))

sPickle.s_dump(x_train, open(dest_path + 'x_train_mel.p', 'wb'))
sPickle.s_dump(y_train, open(dest_path + 'y_train_mel.p', 'wb'))
sPickle.s_dump(x_test, open(dest_path + 'x_test_mel.p', 'wb'))
sPickle.s_dump(y_test, open(dest_path + 'y_test_mel.p', 'wb'))
sPickle.s_dump(x_holdout, open(dest_path + 'x_holdout_mel.p', 'wb'))
sPickle.s_dump(y_holdout, open(dest_path + 'y_holdout_mel.p', 'wb'))
import sPickle

lst = range(101)
sPickle.s_dump(lst, open('lst.spkl', 'w'))

sum = 0
for element in sPickle.s_load(open('lst.spkl')):
  sum += element
print sum
print

def process_data(s):
  return len(s)

sPickle.s_dump((process_data(line.split(',')[0]) for line in open('input.csv')),
               open('lst1.spkl', 'w'))

for elt in sPickle.s_load(open('lst1.spkl')):
  print elt
print

f = open('lst2.spkl', 'w')
for line in open('input.csv'):
    sPickle.s_dump_elt(process_data(line.split(',')[0]), f)
f.close()

for elt in sPickle.s_load(open('lst2.spkl')):
  print elt
print

l = range(10)
Beispiel #10
0
        for x in xrange(len(dataarray)):
            if feattypedict[featnamearray[x]]==1:
                vector=[0 for y in featdict[featnamearray[x]].values()]
                vector[featdict[featnamearray[x]][dataarray[x]]]=1
                featurearray=featurearray+vector
            else :
                pass
        featurearrays.append(featurearray)

    numberfeatures=numpy.array(numberfeatures,dtype=numpy.float32)
    mean=numberfeatures.mean(axis=0)
    std=numberfeatures.std(axis=0)

    numberfeatures=(numberfeatures-mean)/std
    vectorfeatures=numpy.array(featurearrays,dtype=numpy.float32)

    assert len(numberfeatures)==len(vectorfeatures) 


    allfeatures=numpy.hstack((vectorfeatures,vectorfeatures))

    print len(featurearrays)
    output =open('allfeat.pkl','wb')
    
    sPickle.s_dump(mean,output)
    sPickle.s_dump(std,output)
    sPickle.s_dump(allfeatures,output)
    output.close()
    print "end"

Beispiel #11
0
    for dataarray in dataarrays:
        featurearray = []
        for x in xrange(len(dataarray)):
            if feattypedict[featnamearray[x]] == 1:
                vector = [0 for y in featdict[featnamearray[x]].values()]
                vector[featdict[featnamearray[x]][dataarray[x]]] = 1
                featurearray = featurearray + vector
            else:
                pass
        featurearrays.append(featurearray)

    numberfeatures = numpy.array(numberfeatures, dtype=numpy.float32)
    mean = numberfeatures.mean(axis=0)
    std = numberfeatures.std(axis=0)

    numberfeatures = (numberfeatures - mean) / std
    vectorfeatures = numpy.array(featurearrays, dtype=numpy.float32)

    assert len(numberfeatures) == len(vectorfeatures)

    allfeatures = numpy.hstack((vectorfeatures, vectorfeatures))

    print len(featurearrays)
    output = open('allfeat.pkl', 'wb')

    sPickle.s_dump(mean, output)
    sPickle.s_dump(std, output)
    sPickle.s_dump(allfeatures, output)
    output.close()
    print "end"
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Dylan @ 2016-04-24 20:23:07

import os
import numpy
import sPickle

labels = []
NBYTE = (35 + 1) * 3 + 1  #mgc/lf0(3 windows)/uvflag
for file in [os.path.join("cmp1", f) for f in sorted(os.listdir("cmp1/"))]:
    labelarray = numpy.fromfile(file, dtype=numpy.float32)
    assert len(labelarray) % NBYTE == 0
    nFrame = len(labelarray) / NBYTE
    labels += list(labelarray.reshape(nFrame, NBYTE))
    #print nFrame

labels = numpy.array(labels)
l_mean = labels.mean(axis=0)
l_std = labels.std(axis=0)

print len(labels)
labels = (labels - l_mean) / l_std

with open("labels.pkl", "wb") as fp:
    sPickle.s_dump(l_mean, fp)
    sPickle.s_dump(l_std, fp)
    sPickle.s_dump(labels, fp)
                    [x_train.append(x) for x in chunks]
                    [y_train.append(x) for x in [oneLabel] * len(chunks)]
                    print "parsed", name, "as training data"
                else:
                    [x_test.append(x) for x in chunks]
                    [y_test.append(x) for x in [oneLabel] * len(chunks)]
                    print "parsed", name, "as testing data"
                sid += 1

x_train = np.asarray(x_train)
y_train = np.asarray(y_train)
x_test = np.asarray(x_test)
y_test = np.asarray(y_test)

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= np.amax(x_train)
x_test /= np.amax(x_test)

print "done parsing and normalizing, final shapes are"

print x_train.shape
print y_train.shape
print x_test.shape
print y_test.shape

sPickle.s_dump(x_train, open(destPath + 'x_train.p', 'wb'))
sPickle.s_dump(y_train, open(destPath + 'y_train.p', 'wb'))
sPickle.s_dump(x_test, open(destPath + 'x_test.p', 'wb'))
sPickle.s_dump(y_test, open(destPath + 'y_test.p', 'wb'))
Beispiel #14
0
    # first create the binary matrix for victims/attackers
    print 'Computing the binary matrix for victim/attackers...'
    binary_data = dict()
    binary_data = bm.compute_binary_matrix(uni_target_ips, train_set,
                                           train_w_length, offset, i)

    # then run the CA algorithm
    if ca_dict_computed == False:
        # compute the densities - i.e. run CA algorithm
        print 'Running CA clustering algorithm - computing density matrix...'
        ca_densities = dict()
        ca_densities = ca.compute_density_matrix(uni_target_ips,
                                                 uni_attacker_ips, binary_data,
                                                 train_w_length, i)
        sPickle.s_dump(ca_densities.iteritems(),
                       open("densities" + str(i) + ".spkl", "w"))

    else:
        # load the computed density matrix for the window
        print 'Loading CA density matrix from file...'
        #ca_densities = dict()
        ca_densities = dict(
            sPickle.s_load(open("densities" + str(i) + ".spkl")))

    # compute the denominator needed for the similarities and store it in a dictionary
    print 'Computing the denominator for similarities...'
    sim_denom = dict()
    sim_denom = sim.compute_denominator(train_set, uni_target_ips,
                                        train_w_length, offset, i, start_day)

    # find similarities between victims
 def _dump(self, iterable):
     with open(self.testfn, 'wb') as f:
         sPickle.s_dump(iterable, f)