예제 #1
0
def main():
    gains = [40, 50, 60]
    tx_beams = np.arange(0, 24)
    num_samples_tot_gain_tx_beam = 10000

    # Order is gain *

    indexes = np.arange(
        0,
        num_samples_tot_gain_tx_beam * len(tx_beams) * len(gains)
    )
    batch_size = 32
    data_path = '/media/michele/rx-12-tx-tm-0-rx-tm-1.h5'

    num_blocks_per_frame = 15
    how_many_blocks_per_frame = 1
    num_samples_per_block = 2048
    num_tx_beams = len(tx_beams)
    input_size = 1024
    
    dg = DataGenerator(
        indexes,
        batch_size,
        data_path,
        num_tx_beams,
        num_blocks_per_frame,
        input_size,
        num_samples_per_block,
        how_many_blocks_per_frame,
        shuffle=False,
        is_2d=False
    )

    batch_gain_tx_beam = num_samples_tot_gain_tx_beam / batch_size


    # for [i_g, val_g] in enumerate(gains):
    #     print("Gain: " + str(val_g))
    #     for [i_t, val_t] in enumerate(tx_beams):
    #         print("Beam idx: " + str(val_t))
    #         batch_index = (i_g * len(tx_beams) * batch_gain_tx_beam) + i_t * batch_gain_tx_beam
    #         print("Batch idx: " + str(batch_index))
    #         [batch, batch_y] = dg.__getitem__(batch_index)
    #         print("tx_beam %d y % s" % (val_t, batch_y[0]))
    #         # print(batch_y[0])


    for i in range(dg.__len__()):
        print("Batch idx: " + str(i))
        [batch, batch_y] = dg.__getitem__(i)
        print("tx_beam %s %s y %s %s" % (batch[0][0], batch[-1][0], batch_y[0], batch_y[-1]))
        print("batch_x_size: %s, batch_y_size: %s" % (str(batch.shape), str(batch_y.shape)))
예제 #2
0
def test_data_generator():

    hdf5_file = os.path.join("..", "..", "data", "76_79_80.hdf5")
    params = {"batch_size": 32, "shuffle": True, "n_classes": 2}
    generator = DataGenerator(hdf5_file, "train", **params)
    print("Batch size: ", generator.batch_size)
    print("Data dim: ", generator.dim)
    print("Classes: ", generator.n_classes)
    print("Data number: ", generator.n_data)
    print("list_IDs length: ", len(generator.list_IDs))
    print("Training steps: ", len(generator))
    steps = len(generator)
    # mini batch not at the end of the dataset
    dataset, labels = generator.__getitem__(steps - 2)
    print("Data 1 shape: ", dataset[0].shape)
    print("Data 32 shape: ", dataset[31].shape)
    print("Label 1: ", labels[0])
    print("Label 32: ", labels[31])
    print("Data:", dataset)
    try:
        dataset[32]
    except IndexError:
        print("Dataset size is correct")
        print()
        # raise
    # the last mini batch, size usually is smaller than batch_size
    dataset, labels = generator.__getitem__(steps - 1)
    print("Batch size is:", params["batch_size"])
    print("The size of the last mini batch is:", len(dataset))
    l = len(dataset)
    print("Data 1 shape:", dataset[0].shape)
    print("Data {} (last data) shape:".format(l), dataset[l - 1].shape)
    print("Label 1:", labels[0])
    print("Label {}:".format(l), labels[l - 1])
    try:
        dataset[l]
    except IndexError:
        print("Dataset size is correct")
예제 #3
0
class TestDataGen(unittest.TestCase):
    def setUp(self):
        _, self.df = combine_all_wavs_and_trans_from_csvs(
            "data_dir/sample_librivox-test-clean.csv")

        self.dg = DataGenerator(self.df, batch_size=10, epoch_length=10)

    def tearDown(self):
        del self.dg

    # Data generator
    def test_extract_features_and_pad(self):
        indexes = np.arange(5)
        x_data_raw, y_data_raw, sr = load_audio(self.df,
                                                indexes_in_batch=indexes)
        x_data, input_length = self.dg.extract_features_and_pad(x_data_raw, sr)

        self.assertEqual(x_data.shape, (5, 382, 26))
        self.assertEqual(len(input_length), 5)
        self.assertLessEqual(all(input_length), 382)

    def test_get_seq_size(self):
        x_data_raw, _, sr = load_audio(self.df, indexes_in_batch=[0])

        size = self.dg.get_seq_size(x_data_raw[0], sr)

        self.assertEqual(size, 256)

    def test_get_item(self):
        batch0, _ = self.dg.__getitem__(0)
        batch1, _ = self.dg.__getitem__(1)

        x_data0 = batch0.get("the_input")
        x_data1 = batch1.get("the_input")
        y_data0 = batch0.get("the_labels")
        y_data1 = batch1.get("the_labels")
        input_length = batch0.get("input_length")
        label_length = batch0.get("label_length")

        self.assertTupleEqual(x_data0.shape, (10, 494, 26))
        self.assertTupleEqual(x_data1.shape, (9, 1514, 26))
        self.assertEqual(y_data0.shape[0], 10)
        self.assertEqual(y_data1.shape[0], 9)
        self.assertEqual(input_length.shape[0], 10)
        self.assertEqual(label_length.shape[0], 10)

    # Feature generation utils
    def test_load_audio(self):
        indexes = np.arange(5)
        x_data_raw, y_data_raw, sr = load_audio(self.df,
                                                indexes_in_batch=indexes)

        self.assertEqual(len(x_data_raw), 5)
        self.assertEqual(len(y_data_raw), 5)

    def test_extract_mfcc(self):
        x_data_raw, _, sr = load_audio(self.df, indexes_in_batch=[0])

        mfcc_padded, x_length = extract_mfcc_and_pad(x_data_raw[0],
                                                     sr=sr,
                                                     max_pad_length=500,
                                                     frame_length=320,
                                                     hop_length=160,
                                                     mfcc_features=26,
                                                     n_mels=40)

        self.assertTupleEqual(mfcc_padded.shape, (500, 26))
        self.assertEqual(x_length, 256)

    def test_extract_mel_spec(self):
        x_data_raw, _, sr = load_audio(self.df, indexes_in_batch=[0])
        mel_spec, x_length = extract_mel_spectrogram_and_pad(
            x_data_raw[0],
            sr=sr,
            max_pad_length=500,
            frame_length=320,
            hop_length=160,
            n_mels=40)

        self.assertTupleEqual(mel_spec.shape, (500, 40))
        self.assertEqual(x_length, 256)

    def test_convert_transcripts(self):
        _, y_data_raw, sr = load_audio(self.df, indexes_in_batch=[0])
        transcript, y_length = convert_and_pad_transcripts(y_data_raw)
        exp = [
            23., 5., 18., 5., 0., 9., 0., 2., 21., 20., 0., 1., 12., 18., 5.,
            1., 4., 25., 0., 15., 14., 0., 20., 8., 5., 0., 3., 1., 18., 20.
        ]

        list = transcript[0].tolist()
        self.assertListEqual(list, exp)
        self.assertEqual(y_length, 30)
예제 #4
0
            validation_generator.on_epoch_end()

        else:
            validation_generator = DataGenerator(
                directory="/home/helle246/data/pancreas/testing",
                shape=(512, 512),
                img_channels=1,
                lbl_channels=2,
                flat_labels=True,
                batch_size=20,
                tvl=False,
                ds='psd')
            validation_generator.on_epoch_end()

        for i in range(iterations):
            x, y = validation_generator.__getitem__(i)
            eval_results = model.evaluate(x, y)
            #print(model.metrics_names)
            #print(eval_results)

            dices.append(eval_results[3])
            recalls.append(eval_results[2])
            precisions.append(eval_results[1])

            y_pred = model.predict(x)

            # np.save("x.npy", x)
            # np.save("pred.npy", np.greater(np.reshape(y_pred[:,:,0],(20,512,512,1)), 0.5))
            # np.save("true.npy", np.reshape(y[:,:,[0]], (20,512,512,1)))

            y = y.flatten()
from DataGenerator import DataGenerator

training_generator = DataGenerator('../Data/ModelNet30/train_files.txt',
                                   batch_size=32,
                                   dim=(30, 30, 30),
                                   n_classes=31,
                                   shuffle=True)

training_generator.__getitem__(0)
        datadirs=paths,
        is_label_to_categorical=False,
        is_normalize_image_datatype=True,
        is_apply_text_preprocessing=FLAGS.is_apply_text_preprocessing,
        is_apply_sequence_preprocessing=FLAGS.is_apply_sequence_preprocessing)

    n_clusters = FLAGS.n_classes

print("n_clusters")
print(n_clusters)
kmeans = KMeans(n_clusters=n_clusters, n_init=20,
                batch_size=FLAGS.batch_size)  #random_state=0,    #, n_jobs=4)
y_pred_kmeans = kmeans
if True:  #False:   #ToDo temp
    for bi in range(0, data.__len__()):
        x, y = data.__getitem__(bi, True, is_return_only_x=False)
        y_pred_kmeans = y_pred_kmeans.partial_fit(x[:, :])

        ##
        print("y ", y.shape, y_pred_kmeans.labels_.shape)
        #print( metrics.acc(y, y_pred_kmeans.labels_) )
else:
    x, y = data.__getitem__(0, True, is_return_only_x=False)

#to speed up moved out side loop, will it going to change anything with efficiency of the algorithm
#print( metrics.acc(y, y_pred_kmeans.labels_) ) #padd entire y

#dims = [x.shape[-1], 500, 500, 2000, 10]
#dims = [x.shape[-1], 500, 500, 600, 10]
dims = [x.shape[-1], 500, 500, 600, 234]
init = VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform')
예제 #7
0
# Shuffle IDs
from sklearn.utils import shuffle

list_IDs_shu, labels_shu = shuffle(list_IDs, labels, random_state=None)

from DataGenerator import DataGenerator

params = {
    'dim': (128, 128),
    'batch_size': len(labels),
    'n_classes': 91,
    'n_channels': 1,
    'shuffle': True
}
generator = DataGenerator(list_IDs_shu, labels_shu, **params)
x, y = generator.__getitem__(0)
pred = model.predict(x)
pred_en = encoder.predict(x)

pred_255 = pred * 255
x_255 = x * 255
from PIL import Image
for i in range(0, 10):

    inp = np.zeros((128, 128, 3), dtype=np.uint8)
    inp[:, :, 0] = pred_255[i][:, :, 0]
    inp[:, :, 1] = pred_255[i][:, :, 0]
    inp[:, :, 2] = pred_255[i][:, :, 0]
    # print(np.shape(inp))
    # print(inp)
    img = Image.fromarray(inp)
예제 #8
0
    output_feature_indecis = [0, 1]

    data_generator = DataGenerator(path_name,
                                   file_name,
                                   list_IDs=train_IDs,
                                   batch_size=batch_size,
                                   hist_len=hist_len,
                                   pred_len=pred_len,
                                   input_feature_indecis=input_feature_indecis,
                                   output_feature_indecis=output_feature_indecis,
                                   shuffle=True,
                                   return_output_data=False,
                                   return_label=True)
    data_generator.on_epoch_end()
    input_data, label = data_generator.__getitem__(0)
    input_data.shape

    num_input_timesteps = input_data.shape[1]
    num_input_features = input_data.shape[2]

    # setup logger
    FORMAT = '%(asctime)s %(process)d %(message)s'
    logging.basicConfig(format=FORMAT, stream=sys.stdout, level=logging.INFO)
    logger = logging.info

    model_dir = 'md_categorical_' + model_name + \
                '_neighbors' + str(neighbors) + \
                '_hist_len' + str(hist_len) + \
                '_pred_len' + str(pred_len) + \
                '_layers' + str(num_layers) + \