예제 #1
0
def prepare_dataset_convnet(resources_dir, filenames, winLSecs, energyBands_sr,
                            n_classes):
    # ------------- prepare dataset
    Xs = []
    ys = []

    for iString in range(0, len(filenames)):
        inputFile = resources_dir + filenames[
            iString] + '.16bit-EnergyBankFilter.txt'
        energy_bands = np.loadtxt(inputFile, skiprows=0).T
        energy_bands = (energy_bands / 120) + 1  #normalize [0-1]
        target = np.ones(energy_bands.shape[1]) * (iString)

        if iString == 0:
            # We want winLSecs seconds of audio in our window
            #winLSecs = 0.05
            windowSize = int((winLSecs * energyBands_sr) // 2 * 2)
            # And we'll move our window by windowSize/2
            hopSize = windowSize // 2
            print('windowSize', windowSize)

        n_hops = (energy_bands.shape[1]) // hopSize
        n_hops = int(n_hops) - 1  #??
        for hop_i in range(n_hops):
            # Creating our sliding window
            frames = energy_bands[:, (hop_i * hopSize):(hop_i * hopSize +
                                                        windowSize)]
            avgString = round(
                statistics.median(target[(hop_i * hopSize):(hop_i * hopSize +
                                                            windowSize)]))
            if (avgString - target[hop_i * hopSize] == 0
                ):  #take only windows in the same string
                Xs.append(frames[..., np.newaxis])
                ys.append(int(avgString))
                #ys.append(target[(hop_i * hopSize):(hop_i * hopSize + windowSize)])

    Xs = np.array(Xs)
    ys = np.array(ys)
    print("Xs.shape:", Xs.shape, ", Xs.shape:", ys.shape)
    #ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], n_classes=0)
    ds = datasets.Dataset(Xs=Xs,
                          ys=ys,
                          split=[0.8, 0.1, 0.1],
                          one_hot=True,
                          n_classes=n_classes)

    return ds, Xs, ys, windowSize
예제 #2
0
def prepare_dataset_ffw(resources_dir, filenames, winLSecs, energyBands_sr,
                        n_classes):
    Xs = []
    ys = []

    for iString in range(0, len(filenames)):
        inputFile = resources_dir + filenames[
            iString] + '.16bit-EnergyBankFilter.txt'
        energy_bands = np.loadtxt(inputFile, skiprows=0).T
        energy_bands = (energy_bands / 120) + 1  # normalize [0-1]
        target = np.ones(energy_bands.shape[1]) * iString
        print("Preparing dataset: reading ", inputFile)

        if iString == 0:
            # We want winLSecs seconds of audio in our window
            # winLSecs = 0.05
            windowSize = int((winLSecs * energyBands_sr) // 2 * 2)
            # And we'll move our window by windowSize/2
            hopSize = windowSize // 2
            print('windowSize', windowSize)

        n_hops = (energy_bands.shape[1]) // hopSize
        n_hops = int(n_hops) - 1  # ??
        nFrames = len(target)
        for iframe in range(nFrames):
            frame = energy_bands[:, iframe]
            # frame=np.append(frame, pitch[iframe])

            Xs.append(frame)
            ys.append(int(target[iframe]))
            if iframe % 100 == 0:
                print("String:", iString, ", frame:", iframe, "/", nFrames)

    Xs = np.array(Xs)
    ys = np.array(ys)
    print(Xs.shape, ys.shape)

    #n_classes = 5  # 0--> not playing, 1,2,3,4 --> strings
    ds = datasets.Dataset(Xs=Xs,
                          ys=ys,
                          split=[0.8, 0.1, 0.1],
                          one_hot=True,
                          n_classes=n_classes)
    return ds, Xs, ys, windowSize
예제 #3
0
#print(filenames)

# Read every filename as an RGB image
imgs = [plt.imread(fname)[..., :3] for fname in filenames]

# Crop every image to a square
imgs = [utils.imcrop_tosquare(img_i) for img_i in imgs]

# Then resize the square image to 100 x 100 pixels; mode='reflect'
imgs = [resize(img_i, (100, 100), mode='reflect') for img_i in imgs]

# Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array):
Xs = np.array(imgs).astype(np.float32)
#print(Xs.shape)
assert (Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100)
ds = datasets.Dataset(Xs)

mean_img = ds.mean()
#plt.imshow(mean_img)
#plt.show()
# If your image comes out entirely black, try w/o the `astype(np.uint8)`
# that means your images are read in as 0-255, rather than 0-1 and
# this simply depends on the version of matplotlib you are using.
std_img = ds.std()
#plt.imshow(std_img)
#plt.show()
#print(std_img.shape)
std_img = np.mean(std_img, axis=2)
#plt.imshow(std_img)
#plt.show()
#plt.imshow(ds.X[0])
예제 #4
0
myown_obj = get_myown_imgs(direc)


# Then resize the square image to 100 x 100 pixels
myown_img = [resize(img_i, (100, 100, 3)) for img_i in myown_img]
myown_obj = [resize(img_i, (100, 100, 3)) for img_i in myown_obj]
plt.figure(figsize=(10, 10))
plt.imshow(utils.montage(myown_img))

# Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array):
Xs = np.array(myown_img).copy()*255

print(Xs.shape)
assert(Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100)

ds_img = datasets.Dataset(Xs)

# Then convert the list of images to a 4d array (e.g. use np.array to convert a list to a 4d array):
Xs = np.array(myown_obj).copy()*255

print(Xs.shape)
assert(Xs.ndim == 4 and Xs.shape[1] <= 100 and Xs.shape[2] <= 100)

ds_obj = datasets.Dataset(Xs)

for (X_img, y) in ds_img.train.next_batch(batch_size=25):
    print(X_img.shape)
for (X_obj, y) in ds_obj.train.next_batch(batch_size=25):
    print(X_obj.shape)

# Just to make sure that you've coded the previous two functions correctly:
예제 #5
0
def main(winLSecs):
    data_dir = "/Users/alfonso/matlab/IndirectAcquisition/keras/dataforMarius/export"
    files = [os.path.join(data_dir, file_i) for file_i in os.listdir(data_dir) if file_i.endswith('.mat')]

    matlabStruct=umatlab.loadmat(files[1]).get('data')
    energyBand=matlabStruct.get('residualEnergyBand')
    energyBand=(energyBand /120 )+1 #normalize [0-1]
    totalSecs=matlabStruct.get('waveIn').shape[0]/matlabStruct.get('audioSR')
    energyBands_sr=240 #energyBand.shape[1]/totalSecs #This is around 240Hz- around 5ms at 44100Hz
    controlNames=matlabStruct.get('controlNames')
    controlData=matlabStruct.get('controlData')
    indexVel=[i for i in range(controlNames.shape[0]) if controlNames[i] == 'abs(velocity)'][0]
    indexForce=[i for i in range(controlNames.shape[0]) if controlNames[i] == 'forceN'][0]
    velocity=controlData[indexVel,:]/150
    force=(controlData[indexForce,:]+0.2)/2
    #indexString=[i for i in range(controlNames.shape[0]) if controlNames[i] == 'string'][0]
    #string=controlData[indexString,:]
    #pitch=controlData[6,:]/1500

    # We want winLSecs seconds of audio in our window
    #winLSecs = 0.05
    windowSize = int((winLSecs * energyBands_sr) // 2 * 2)
    # And we'll move our window by windowSize/2
    hopSize = windowSize // 2
    n_hops = (energyBand.shape[1]) // hopSize
    print('windowSize', windowSize)



    # ------------- prepare dataset
    Xs = []
    ys = []

    # Let's start with the music files
    for filename in files:
        # print(filename)
        matlabStruct = umatlab.loadmat(filename).get('data')
        energyBand = (matlabStruct.get('energyBand') / 120) + 1
        # energyBand=(matlabStruct.get('residualEnergyBand')/120)+1
        controlData = matlabStruct.get('controlData')
        controlNames = matlabStruct.get('controlNames')
        target = controlData[indexVel, :] / 150
        # target=(controlData[indexForce,:]+0.2)/2

        n_hops = (energyBand.shape[1]) // hopSize

        # print(n_frames_per_second, n_frames, frame_hops, n_hops)
        n_hops = int(n_hops) - 1
        for hop_i in range(n_hops):
            # Creating our sliding window
            frames = energyBand[:, (hop_i * hopSize):(hop_i * hopSize + windowSize)]
            Xs.append(frames[..., np.newaxis])
            # And then store the vel
            ys.append(target[(hop_i * hopSize):(hop_i * hopSize + windowSize)])

    Xs = np.array(Xs)
    ys = np.array(ys)
    print(Xs.shape, ys.shape)

    ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], n_classes=0)

    #---------- create ConvNet
    tf.reset_default_graph()

    # Create the input to the network.  This is a 4-dimensional tensor (batch_size, height(freq), widht(time), channels?)!
    # Recall that we are using sliding windows of our magnitudes (TODO):
    X = tf.placeholder(name='X', shape=(None, Xs.shape[1], Xs.shape[2], Xs.shape[3]), dtype=tf.float32)

    # Create the output to the network.  This is our one hot encoding of 2 possible values (TODO)!
    Y = tf.placeholder(name='Y', shape=(None, windowSize), dtype=tf.float32)

    # TODO:  Explore different numbers of layers, and sizes of the network
    n_filters = [9, 9, 9]

    # Now let's loop over our n_filters and create the deep convolutional neural network
    H = X
    for layer_i, n_filters_i in enumerate(n_filters):
        # Let's use the helper function to create our connection to the next layer:
        # TODO: explore changing the30 parameters here:
        H, W = utils.conv2d(
            H, n_filters_i, k_h=2, k_w=2, d_h=2, d_w=2,
            name=str(layer_i))

        # And use a nonlinearity
        # TODO: explore changing the activation here:
        # H = tf.nn.relu(H)
        H = tf.nn.softplus(H)
        # H 4D tensor [batch, height, width, channels]
        #    H=tf.nn.max_pool(value=H, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1), padding='SAME', data_format='NHWC', name=None)

        # Just to check what's happening:
        print(H.get_shape().as_list())

    # Connect the last convolutional layer to a fully connected network
    fc1, W = utils.linear(H, n_output=100, name="fcn1", activation=tf.nn.relu)
    # fc2, W = utils.linear(fc, n_output=50, name="fcn2", activation=tf.nn.relu)
    # fc3, W = utils.linear(fc2, n_output=10, name="fcn3", activation=tf.nn.relu)


    # And another fully connceted network, now with just n_classes outputs, the number of outputs
    Y_pred, W = utils.linear(fc1, n_output=windowSize, name="pred", activation=tf.nn.sigmoid)

    loss = tf.squared_difference(Y_pred, Y)
    cost = tf.reduce_mean(tf.reduce_sum(loss, 1))
    learning_rate = 0.001
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

    # predicted_y = tf.argmax(Y_pred,1)
    # actual_y = tf.argmax(Y,1)
    # correct_prediction = tf.equal(predicted_y, actual_y)
    # accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))



    #-----TRAIN ConvNet
    # Explore these parameters: (TODO)
    batch_size = 400

    # Create a session and init!
    sess = tf.Session()
    saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())

    # Now iterate over our dataset n_epoch times
    n_epochs = 100
    for epoch_i in range(n_epochs):
        print('Epoch: ', epoch_i)

        # Train
        this_cost = 0
        its = 0

        # Do our mini batches:
        for Xs_i, ys_i in ds.train.next_batch(batch_size):
            # Note here: we are running the optimizer so
            # that the network parameters train!
            this_cost += sess.run([cost, optimizer], feed_dict={
                X: Xs_i, Y: ys_i})[0]
            its += 1
            # print(this_cost / its)
        print('Training cost: ', this_cost / its)

        # Validation (see how the network does on unseen data).
        this_cost = 0
        its = 0

        # Do our mini batches:
        for Xs_i, ys_i in ds.valid.next_batch(batch_size):
            # Note here: we are NOT running the optimizer!
            # we only measure the accuracy!
            this_cost += sess.run(cost, feed_dict={
                X: Xs_i, Y: ys_i})  # , keep_prob: 1.0
            its += 1
        print('Validation cost: ', this_cost / its)

    # #-----plot convolutional Kernels learned
    # g = tf.get_default_graph()
    # for layer_i in range(len(n_filters)):
    #     W = sess.run(g.get_tensor_by_name('{}/W:0'.format(layer_i)))
    #     plt.figure(figsize=(5, 5))
    #     plt.imshow(utils.montage_filters(W))
    #     plt.title('Layer {}\'s Learned Convolution Kernels'.format(layer_i))

    modelFileName = './models/velocity_wL' + str(winLSecs) + '_' + datetime.datetime.now().strftime(
        "%Y%m_d_%H%M") + '.chkp'
    saver.save(sess, modelFileName)
예제 #6
0
data_file = "/Users/luke/ownCloud/deep_learning/course/final_project/fer2013.csv"
labels, images = import_csv(data_file)
assert (len(labels) == len(images))

#read in the images
imgs = []
for image in images:
    imgs.append(np.fromstring(str(image), dtype=np.uint8, sep=' '))
Xs = imgs
ys = labels
Xs = np.array(imgs).astype(np.uint8)
ys = np.array(ys).astype(np.uint8)
#print(ys)
assert (len(Xs) == len(ys))

ds = datasets.Dataset(Xs, ys, one_hot=True, split=[0.8, 0.1, 0.1])

for i in range(0, 10):
    ds.X[i].shape

from tensorflow.python.framework.ops import reset_default_graph

reset_default_graph()

# We'll have placeholders just like before which we'll fill in later.
n_input = 48 * 48
n_output = 7
ds_X_reshape = np.reshape(ds.X, (28709, 48, 48, 1))
ds_valid_images_reshape = np.reshape(ds.valid.images,
                                     (ds.valid.images.shape[0], 48, 48, 1))
예제 #7
0
        ys.append(1)

# Convert them to an array:
Xs = np.array(Xs)
ys = np.array(ys)

print(Xs.shape, ys.shape)

# Just to make sure you've done it right.  If you've changed any of the
# parameters of the dft/hop size, then this will fail.  If that's what you
# wanted to do, then don't worry about this assertion.
assert (Xs.shape == (15360, 43, 256, 1) and ys.shape == (15360, ))

n_observations, n_height, n_width, n_channels = Xs.shape

ds = datasets.Dataset(Xs=Xs, ys=ys, split=[0.8, 0.1, 0.1], one_hot=True)

Xs_i, ys_i = next(ds.train.next_batch())

# Notice the shape this returns.  This will become the shape of our input and output of the network:
print(Xs_i.shape, ys_i.shape)

assert (ys_i.shape == (100, 2))

tf.reset_default_graph()

# Create the input to the network.  This is a 4-dimensional tensor!
# Don't forget that we should use None as a shape for the first dimension
# Recall that we are using sliding windows of our magnitudes (TODO):
with tf.device('/gpu:0'):
    X = tf.placeholder(name='X', shape=[None, 43, 256, 1], dtype=tf.float32)