Example #1
0
def create_stream_reader(single_signal_file_list):
    data_streams = []
    for audio_path in single_signal_file_list:
        stream = pescador.Streamer(wav_generator, audio_path)
        data_streams.append(stream)
    mux = pescador.ShuffledMux(data_streams)
    batch_gen = pescador.buffer_stream(mux, batch_size)
    return batch_gen
Example #2
0
    def test_shuffled_mux_integer_weights(self):
        "Tests that integer-valued weights are supported (issue #143)."
        a = pescador.Streamer(_cycle, 'a')
        b = pescador.Streamer(_cycle, 'b') 
        c = pescador.Streamer(_cycle, 'c')

        int_weights = [6, 3, 1]
        int_mux = pescador.ShuffledMux(
            [a, b, c], weights=int_weights, random_state=10)
        int_seq = "".join(list(int_mux.iterate(max_iter=20)))
     
        float_weights = [6.0, 3.0, 1.0]
        float_mux = pescador.ShuffledMux(
            [a, b, c], weights=float_weights, random_state=10)
        float_seq = "".join(list(float_mux.iterate(max_iter=20)))
        
        assert int_seq == float_seq
Example #3
0
    def __init__(self,
                 source_filepath,
                 seq_len=512,
                 hop=None,
                 normalize=True,
                 transform=None,
                 restart_streams=False):
        super(MusicDataset).__init__()
        source_folder = Path(source_filepath)
        self.seq_len = seq_len

        if hop == None:
            hop = seq_len

        self.hop = hop

        self.normalize = normalize
        self.transform = transform

        # get songs' path
        songs = []
        for root, dirs, files in os.walk(source_folder):
            for name in files:
                songs.append(os.path.join(root, name))

        # let's restrict to wav files (damn .DS_Store)
        songs = [song for song in songs if song.endswith('.wav')]

        # get songs length
        data = []
        for song in songs:
            # get audio info
            song_info = torchaudio.info(song)
            data.append({
                "path": song,
                "len": int(song_info[0].length / song_info[0].channels)
            })

        self.data = data

        # muxing different streams
        if restart_streams:
            streams = [
                pescador.Streamer(generate_rnd_chunk, track['path'],
                                  track['len'], seq_len, normalize, transform)
                for track in data
            ]
            self.mux = pescador.ShuffledMux(streams)
        else:
            streams = [
                pescador.Streamer(generate_chunk, track['path'], track['len'],
                                  seq_len, hop, normalize, transform)
                for track in data
            ]
            self.mux = pescador.StochasticMux(streams,
                                              len(streams),
                                              rate=None,
                                              mode='exhaustive')
Example #4
0
def create_batch_generator(audio_filepath_list, batch_size):
    streamers = []
    for audio_filepath in audio_filepath_list:
        s = pescador.Streamer(audio_sample_generator, audio_filepath)
        streamers.append(s)

    mux = pescador.ShuffledMux(streamers)
    batch_gen = pescador.buffer_stream(mux, batch_size)
    return batch_gen
def create_batch_generator(audio_filepath_list, batch_size):
    streamers = []
    for audio_filepath in audio_filepath_list:
        class_name = os.path.basename(audio_filepath).split('-')[0]
        if class_name in class_labels:
            label = class_labels[class_name]
        else:
            label = None
        s = pescador.Streamer(file_sample_generator, [audio_filepath, label])
        streamers.append(s)

    mux = pescador.ShuffledMux(streamers)
    batch_gen = pescador.buffer_stream(mux, batch_size)

    return batch_gen
Example #6
0
    def test_shuffled_mux_simple(self):
        "Test that `ShuffledMux` samples from all provided streams"
        to_generate = ['a', 'b', 'c', 'd', 'e']
        streams = [pescador.Streamer(_cycle, x) for x in to_generate]
        mux = pescador.ShuffledMux(streams, random_state=10)

        samples = list(mux.iterate(max_iter=1000))
        counter = collections.Counter(samples)

        # Test that there is [a, b, c] in the set
        assert set(counter.keys()) == set(to_generate)

        # Test that the statistics line up with expected.
        for i, key in enumerate(to_generate):
            np.testing.assert_approx_equal(counter[key] / len(samples),
                                           mux.weights[i],
                                           significant=1)
Example #7
0
    def batch_generator(self, audio_path_list, audio_label_list, batch_size):
        """
        Generates batches to input algorithm(NN)
            batch <-> bunch of samples inputs algorithm(NN) one at a time
        :param audio_path_list: list of all paths of audio dataset
        :param batch_size: size(lenght) of batch
        :return: generated batch
        """
        # TODO: Make it closure form to get some speed
        streamers = []
        for (audio_path, audio_label) in zip(audio_path_list,
                                             audio_label_list):
            s = pescador.Streamer(self.sample_generator, audio_path,
                                  audio_label)
            streamers.append(s)

        mux = pescador.ShuffledMux(streamers)
        return pescador.buffer_stream(mux, batch_size)
Example #8
0
    def test_shuffled_mux_weights(self):
        "When sampling with weights, do the statistics line up?"
        a = pescador.Streamer(_cycle, 'a')
        b = pescador.Streamer(_cycle, 'b') 
        c = pescador.Streamer(_cycle, 'c')

        weights = [.6, .3, .1]
        mux = pescador.ShuffledMux([a, b, c], weights=weights, random_state=10)

        samples = list(mux.iterate(max_iter=1000))
        counter = collections.Counter(samples)

        # Test that there is [a, b, c] in the set
        assert set(counter.keys()) == {'a', 'b', 'c'}

        # Test the statistics on the counts.
        # Does the sampling approximately match the weights?
        for i, key in enumerate(['a', 'b', 'c']):
            np.testing.assert_approx_equal(counter[key] / len(samples),
                                           weights[i],
                                           significant=1)
Example #9
0
# Each streamer will generate, on average, 5 samples before being
# replaced.
mux1 = pescador.StochasticMux(pop1, 3, 5)

# Let's have 5 active streamers for population 2, and replace
# them after 2 examples on average.
mux2 = pescador.StochasticMux(pop2, 5, 2)

####################
# Mux composition
####################
# We multiplex the two populations using a ShuffledMux.
# The ShuffledMux keeps all of its input streamers active,
# and draws samples independently at random from each one.

# This should generate an approximately equal number of upper- and
# lower-case letters, with more diversity among the lower-case letters.
hier_mux = pescador.ShuffledMux([mux1, mux2])
print(''.join(hier_mux(max_iter=80)))

#####################
# Weighted sampling
#####################
# If you want to specify the sampling probability of mux1 and mux2,
# you can supply weights to the ShuffledMux.
# By default, each input is equally likely.

# This should generate three times as many upper-case as lower-case letters.
weight_mux = pescador.ShuffledMux([mux1, mux2], weights=[0.75, 0.25])
print(''.join(weight_mux(max_iter=80)))
Example #10
0
    def train(self, train_dir, kk=0, folds=10000, grid=False, grid_file=None):
        p = self.p
        net = self.build()
        net.to(device)
        net.train()

        # Loss and Optimizer
        criterion = nn.NLLLoss()
        optimizer = torch.optim.Adam(net.parameters(),
                                     lr=p["learning_rate"],
                                     weight_decay=p["reg_lambda"])

        # Pescador streams
        train_files = [
            os.path.join(train_dir, f) for f in os.listdir(train_dir)
        ]
        streams_train = [
            pescador.Streamer(indexes_gen,
                              ff,
                              1,
                              p["utter_len"],
                              p["char_len"],
                              k=kk,
                              mode="train",
                              folds=folds) for ff in train_files
        ]
        mux_stream_train = pescador.ShuffledMux(streams_train, random_state=33)

        word_idxs = np.empty(shape=(p["batch_size"],
                                    p["utter_len"] * p["char_len"]),
                             dtype=int)
        labels = np.empty(shape=(p["batch_size"]), dtype=int)

        # Train the Model
        for epoch in range(p["num_epochs"]):
            print("Epoch " + str(epoch))
            for i, (word_idx, label, _, _) in enumerate(mux_stream_train):
                np.copyto(word_idxs[i % p["batch_size"]], word_idx)
                labels[i % p["batch_size"]] = label
                if i % p["batch_size"] == 0 and i != 0:
                    answers = autograd.Variable(torch.LongTensor(labels))
                    samples = torch.LongTensor(word_idxs)
                    answers = answers.to(device)
                    samples = samples.to(device)

                    optimizer.zero_grad()
                    outputs = net(samples)
                    loss = criterion(outputs, answers)
                    loss.backward()
                    optimizer.step()
                    if (i + 1) % 20 == 0:
                        print("Epoch [%d/%d], Batch [%d], Loss: %.4f" %
                              (epoch + 1, p["num_epochs"], i + 1, loss.item()))
                    if i // p["batch_size"] > p["max_batch_epoch"]:
                        break

            # Estimate intermediate
            if grid and (epoch + 1) % 10 == 0:
                net.eval()
                results_file = tempfile.NamedTemporaryFile(mode="w",
                                                           delete=False)
                streams_test = [
                    pescador.Streamer(indexes_gen,
                                      ff,
                                      1,
                                      p["utter_len"],
                                      p["char_len"],
                                      k=kk,
                                      mode="test",
                                      folds=folds) for ff in train_files
                ]
                mux_stream_test = pescador.ChainMux(streams_test)
                for i, (word_idx, label, character,
                        _) in enumerate(mux_stream_test):
                    samples = torch.LongTensor(
                        word_idx.reshape((1, p["utter_len"] * p["char_len"])))
                    samples = samples.to(device)
                    output = net(samples)
                    entry = output.cpu().data.numpy()[0]
                    results_file.write(
                        str(character[0]) + "\t" + str(label[0]) + "\t" +
                        "\t".join([
                            str(y) for y in sorted(enumerate(np.exp(entry)),
                                                   key=lambda x: x[1],
                                                   reverse=True)
                        ]) + "\n")
                results_file.close()

                mrr_character = compute_MRR_per_character(results_file.name)
                macro_mrr = compute_MRR_per_prof(results_file.name, 1)
                auroc = compute_auroc(results_file.name, 1)
                grid_file.write(
                    str(epoch + 1) + "\t" + str(mrr_character) + "\t" +
                    str(macro_mrr) + "\t" + str(auroc[0]) + "\n")
                grid_file.flush()
                os.remove(results_file.name)
                net.train()

        # Save the Model
        if grid == False:
            self.save(p["model_path"])