Example #1
0
    def test_can_apply_sliding_window_to_time_frequency_representation(self):
        band = FrequencyBand(0, 22000)
        scale = LinearScale(band, 100)
        arr = ArrayWithUnits(np.zeros(
            (200,
             100)), [TimeDimension(Seconds(1)),
                     FrequencyDimension(scale)])
        sw = SampleRate(Seconds(2), Seconds(2))

        @simple_in_memory_settings
        class Document(BaseModel):
            windowed = ArrayWithUnitsFeature(SlidingWindow,
                                             wscheme=sw,
                                             store=True)

        _id = Document.process(windowed=arr)
        result = Document(_id).windowed

        self.assertIsInstance(result, ArrayWithUnits)
        self.assertEqual((100, 2, 100), result.shape)
        self.assertEqual(3, len(result.dimensions))
        self.assertIsInstance(result.dimensions[0], TimeDimension)
        self.assertEqual(Seconds(2), result.dimensions[0].frequency)
        self.assertIsInstance(result.dimensions[1], TimeDimension)
        self.assertEqual(Seconds(1), result.dimensions[1].frequency)
        self.assertIsInstance(result.dimensions[2], FrequencyDimension)
Example #2
0
    def test_can_round_trip_3d_constant_rate_time_series_with_frequency_dim(
            self):
        dim1 = TimeDimension(Seconds(2), Milliseconds(1000))
        dim2 = TimeDimension(Seconds(1), Milliseconds(500))
        scale = LinearScale(FrequencyBand(20, 20000), 100)
        dim3 = FrequencyDimension(scale)
        raw = np.random.random_sample((5, 2, 100))
        ts = ArrayWithUnits(raw, (dim1, dim2, dim3))

        decoded = self._roundtrip(ts)
        self.assertIsInstance(decoded, ArrayWithUnits)
        self.assertEqual(3, len(decoded.dimensions))

        td1 = decoded.dimensions[0]
        self.assertIsInstance(td1, TimeDimension)
        self.assertEqual(Seconds(2), td1.frequency)
        self.assertEqual(Milliseconds(1000), td1.duration)

        td2 = decoded.dimensions[1]
        self.assertIsInstance(td2, TimeDimension)
        self.assertEqual(Seconds(1), td2.frequency)
        self.assertEqual(Milliseconds(500), td2.duration)

        fd = decoded.dimensions[2]
        self.assertIsInstance(fd, FrequencyDimension)
        self.assertEqual(scale, fd.scale)
        np.testing.assert_allclose(decoded, raw)
 def test_can_round_trip_specific_scale_type(self):
     band = FrequencyBand(20, 20000)
     scale = LinearScale(band, 50)
     dim = FrequencyDimension(scale)
     encoded = self.encoder.encode(dim)
     decoded = self.decoder.decode(encoded)
     self.assertIsInstance(decoded.scale, LinearScale)
     self.assertEqual(scale, decoded.scale)
Example #4
0
 def test_can_round_trip_mixed_dimensions(self):
     original = [
         IdentityDimension(),
         TimeDimension(Seconds(1), Milliseconds(500)),
         FrequencyDimension(LinearScale(FrequencyBand(100, 1000), 10))
     ]
     restored = self.roundtrip(original)
     self.assertSequenceEqual(original, restored)
 def test_can_round_trip_linear_scale(self):
     scale = LinearScale(FrequencyBand(20, 4000), n_bands=100)
     encoder_decoder = LinearScaleEncoderDecoder()
     self.assertTrue(encoder_decoder.can_encode(scale))
     encoded = encoder_decoder.encode(scale)
     self.assertTrue(encoder_decoder.can_decode(encoded))
     decoded = encoder_decoder.decode(encoded)
     self.assertEqual(scale, decoded)
Example #6
0
    def test_maintains_array_with_units_dimensions(self):
        trainer = SupervisedTrainer(
            AutoEncoder(),
            loss=nn.MSELoss(),
            optimizer=lambda model: SGD(model.parameters(), lr=0.1),
            epochs=2,
            batch_size=64,
            checkpoint_epochs=2)

        @simple_in_memory_settings
        class Pipeline(ff.BaseModel):
            inp = ff.PickleFeature(ff.IteratorNode, store=False)

            samples = ff.PickleFeature(ShuffledSamples,
                                       nsamples=500,
                                       dtype=np.float32,
                                       needs=inp,
                                       store=False)

            unitnorm = ff.PickleFeature(UnitNorm, needs=samples, store=False)

            network = ff.PickleFeature(PyTorchAutoEncoder,
                                       trainer=trainer,
                                       needs=unitnorm,
                                       store=False)

            pipeline = ff.PickleFeature(PreprocessingPipeline,
                                        needs=(unitnorm, network),
                                        store=True)

        training = np.random.random_sample((1000, 3))

        def gen(chunksize, s):
            for i in xrange(0, len(s), chunksize):
                yield s[i:i + chunksize]

        _id = Pipeline.process(inp=gen(100, training))
        pipe = Pipeline(_id)

        test = ArrayWithUnits(np.random.random_sample(
            (10, 3)).astype(np.float32),
                              dimensions=[
                                  TimeDimension(Seconds(1)),
                                  FrequencyDimension(
                                      LinearScale(FrequencyBand(100, 1000), 3))
                              ])
        result = pipe.pipeline.transform(test)
        self.assertEqual((10, 2), result.data.shape)
        self.assertIsInstance(result.data, ArrayWithUnits)
        self.assertIsInstance(result.data.dimensions[0], TimeDimension)
        self.assertIsInstance(result.data.dimensions[1], IdentityDimension)

        inverted = result.inverse_transform()
        self.assertEqual((10, 3), inverted.shape)
        self.assertIsInstance(inverted, ArrayWithUnits)
        self.assertIsInstance(inverted.dimensions[0], TimeDimension)
        self.assertIsInstance(inverted.dimensions[1], FrequencyDimension)
Example #7
0
 def test_inversion_returns_time_frequency_representation(self):
     data = np.random.random_sample((33, 30))
     scale = LinearScale(FrequencyBand(20, 20000), 30)
     tf = ArrayWithUnits(
         data,
         [TimeDimension(Seconds(1), Seconds(2)),
          FrequencyDimension(scale)])
     inverted = self.invert_and_assert_class(tf)
     self.assertEqual(Seconds(1), inverted.dimensions[0].frequency)
     self.assertEqual(Seconds(2), inverted.dimensions[0].duration)
     self.assertEqual(scale, inverted.dimensions[1].scale)
Example #8
0
 def test_has_correct_sample_rate(self):
     half_lapped = HalfLapped()
     synth = DCTSynthesizer()
     raw = np.zeros((100, 2048))
     band = FrequencyBand(0, SR44100().nyquist)
     scale = LinearScale(band, raw.shape[1])
     timeseries = ArrayWithUnits(
         raw, [TimeDimension(*half_lapped), FrequencyDimension(scale)])
     output = synth.synthesize(timeseries)
     self.assertIsInstance(output.samplerate, SR44100)
     self.assertIsInstance(output, AudioSamples)
Example #9
0
 def test_can_invert_array_with_units(self):
     td = TimeDimension(Seconds(1))
     fd = FrequencyDimension(LinearScale(FrequencyBand(0, 20000), 100))
     dimensions = [IdentityDimension(), td, fd]
     training = ArrayWithUnits(np.zeros((10, 5, 100)), dimensions)
     Model = self.get_model(slicex=FrequencyBand(1000, 10000))
     _id = Model.process(sliced=training)
     model = Model(_id)
     data = ArrayWithUnits(np.ones((2, 5, 100)), dimensions)
     transformed = model.pipeline.transform(data)
     inverted = transformed.inverse_transform()
     self.assertEqual((2, 5, 100), inverted.shape)
     self.assertEqual(IdentityDimension(), inverted.dimensions[0])
     self.assertEqual(td, inverted.dimensions[1])
     self.assertEqual(fd, inverted.dimensions[2])
Example #10
0
    def test_array_with_units(self):
        r = Reservoir(100)

        frequency_dimension = FrequencyDimension(
            LinearScale(FrequencyBand(100, 1000), 100))

        samples = ArrayWithUnits(np.ones(
            (20,
             100)), [TimeDimension(frequency=Seconds(1)), frequency_dimension])

        r.add(samples)
        mixed = r.get()
        self.assertIsInstance(mixed, ArrayWithUnits)
        self.assertEqual(100, mixed.shape[1])
        self.assertIsInstance(mixed.dimensions[0], IdentityDimension)
        self.assertIsInstance(mixed.dimensions[1], FrequencyDimension)
Example #11
0
    def test_can_sample_from_one_dimensional_feature(self):
        sampler = ReservoirSampler(nsamples=10)

        frequency_dimension = FrequencyDimension(
            LinearScale(FrequencyBand(100, 1000), 100))

        samples = ArrayWithUnits(np.ones(
            (20,
             100)), [TimeDimension(frequency=Seconds(1)), frequency_dimension])

        sampler._enqueue(samples, pusher=None)
        reservoir = sampler._r
        self.assertEqual((10, 100), reservoir.shape)
        self.assertIsInstance(reservoir, ArrayWithUnits)
        self.assertEqual(reservoir.dimensions[0], IdentityDimension())
        self.assertEqual(reservoir.dimensions[1], frequency_dimension)
Example #12
0
 def test_should_preserve_time_dimension_in_forward_transform(self):
     td = TimeDimension(Seconds(1))
     td2 = TimeDimension(Milliseconds(500))
     fd = FrequencyDimension(LinearScale(FrequencyBand(10, 100), 3))
     training_data = ArrayWithUnits(np.zeros((10, 5, 3)),
                                    dimensions=(IdentityDimension(), td2,
                                                fd))
     _id = Document.process(l=training_data)
     doc = Document(_id)
     test_data = ArrayWithUnits(np.zeros((11, 5, 3)),
                                dimensions=(td, td2, fd))
     result = doc.pipeline.transform(test_data)
     self.assertEqual((11, 15), result.data.shape)
     self.assertIsInstance(result.data, ArrayWithUnits)
     self.assertEqual(td, result.data.dimensions[0])
     self.assertEqual(IdentityDimension(), result.data.dimensions[1])
Example #13
0
    def test_sliding_window_maintains_dtype(self):
        band = FrequencyBand(0, 22000)
        scale = LinearScale(band, 100)
        arr = ArrayWithUnits(
            np.zeros((200, 100), dtype=np.uint8),
            [TimeDimension(Seconds(1)),
             FrequencyDimension(scale)])
        sw = SampleRate(Seconds(2), Seconds(2))

        @simple_in_memory_settings
        class Document(BaseModel):
            windowed = ArrayWithUnitsFeature(SlidingWindow,
                                             wscheme=sw,
                                             store=True)

        _id = Document.process(windowed=arr)
        result = Document(_id).windowed
        self.assertEqual(np.uint8, result.dtype)
Example #14
0
    def synthesize(self, freq_adaptive_coeffs):
        fac = freq_adaptive_coeffs

        linear_scale = LinearScale.from_sample_rate(
            self.samplerate,
            self._n_linear_scale_bands(fac),
            always_even=self.scale_slices_always_even)

        frequency_dimension = FrequencyDimension(linear_scale)

        coeffs = ArrayWithUnits(
            np.zeros((len(fac), linear_scale.n_bands), dtype=self.coeffs_dtype),
            dimensions=[fac.dimensions[0], frequency_dimension])

        for band in self.scale:
            coeffs[:, band] += self.band_transform(fac[:, band], norm='ortho')

        return self.short_time_synth.synthesize(coeffs)
Example #15
0
 def test_should_restore_all_dimensions_in_backward_transform(self):
     td = TimeDimension(Seconds(1))
     td2 = TimeDimension(Milliseconds(500))
     fd = FrequencyDimension(LinearScale(FrequencyBand(10, 100), 3))
     training_data = ArrayWithUnits(np.zeros((10, 5, 3)),
                                    dimensions=(IdentityDimension(), td2,
                                                fd))
     _id = Document.process(l=training_data)
     doc = Document(_id)
     test_data = ArrayWithUnits(np.zeros((11, 5, 3)),
                                dimensions=(td, td2, fd))
     result = doc.pipeline.transform(test_data)
     inverted = result.inverse_transform()
     self.assertEqual((11, 5, 3), inverted.shape)
     self.assertIsInstance(inverted, ArrayWithUnits)
     self.assertEqual(td, inverted.dimensions[0])
     self.assertEqual(td2, inverted.dimensions[1])
     self.assertEqual(fd, inverted.dimensions[2])
Example #16
0
    def test_can_dequeue_when_reservoir_is_partially_full(self):
        sampler = ReservoirSampler(nsamples=10)

        frequency_dimension = FrequencyDimension(
            LinearScale(FrequencyBand(100, 1000), 100))

        samples = ArrayWithUnits(np.ones((4, 10, 100)), [
            TimeDimension(frequency=Seconds(10)),
            TimeDimension(frequency=Seconds(1)), frequency_dimension
        ])

        sampler._enqueue(samples, pusher=None)
        reservoir = sampler._dequeue()

        self.assertEqual((4, 10, 100), reservoir.shape)
        self.assertIsInstance(reservoir, ArrayWithUnits)
        self.assertEqual(reservoir.dimensions[0], IdentityDimension())
        self.assertEqual(reservoir.dimensions[1], samples.dimensions[1])
        self.assertEqual(reservoir.dimensions[2], samples.dimensions[2])
Example #17
0
    def test_forward_transform_returns_array_with_units_where_possible(self):
        # train the model on random data
        training = np.random.random_sample((100, 30))
        Model = self.get_model()
        _id = Model.process(unitnorm=training)
        model = Model(_id)

        # create a time-frequency representation
        scale = LinearScale(FrequencyBand(20, 20000), 30)
        data = ArrayWithUnits(np.random.random_sample(
            (10, 30)), [TimeDimension(Seconds(1)),
                        FrequencyDimension(scale)])

        # do a forward pass
        transformed = model.pipeline.transform(data).data

        self.assertIsInstance(transformed, ArrayWithUnits)
        self.assertEqual(2, len(transformed.dimensions))
        self.assertIsInstance(transformed.dimensions[0], TimeDimension)
        self.assertIsInstance(transformed.dimensions[1], IdentityDimension)
Example #18
0
 def decode(self, d):
     band = FrequencyBand(d['start_hz'], d['stop_hz'])
     return LinearScale(band, d['n_bands'], always_even=d['always_even'])
Example #19
0
    def test_can_maintain_array_dimensions_with_supervised_learning(self):
        trainer = SupervisedTrainer(
            model=SupervisedNetwork(),
            loss=nn.BCELoss(),
            optimizer=lambda model: SGD(model.parameters(), lr=0.2),
            epochs=1,
            batch_size=64,
            data_preprocessor=lambda x: x.astype(np.float32),
            label_preprocessor=lambda x: x.astype(np.float32))

        @simple_in_memory_settings
        class Pipeline(ff.BaseModel):
            inp = ff.PickleFeature(ff.IteratorNode, store=False)

            samples = ff.PickleFeature(ShuffledSamples,
                                       nsamples=500,
                                       multiplexed=True,
                                       dtype=np.float32,
                                       needs=inp,
                                       store=False)

            unitnorm = ff.PickleFeature(UnitNorm,
                                        needs=samples.aspect('data'),
                                        store=False)

            hard_labels = ff.PickleFeature(Binarize,
                                           needs=samples.aspect('labels'),
                                           store=False)

            network = ff.PickleFeature(PyTorchNetwork,
                                       trainer=trainer,
                                       needs=dict(data=unitnorm,
                                                  labels=hard_labels),
                                       store=False)

            pipeline = ff.PickleFeature(PreprocessingPipeline,
                                        needs=(unitnorm, network),
                                        store=True)

        # Produce some random points on the unit circle
        samples = np.random.random_sample((1000, 2))
        samples /= np.linalg.norm(samples, axis=1, keepdims=True)

        # a line extending from the origin to (1, 1)
        origin = np.array([0, 0])
        unit = np.array([1, 1])

        # which side of the plane is each sample on?
        labels = np.sign(np.cross(unit - origin, origin - samples))
        labels[labels < 0] = 0

        # scale each sample randomly, forcing the pipeline to normalize data
        factors = np.random.randint(1, 1000, (len(samples), 1))
        scaled_samples = samples * factors
        scaled_samples = scaled_samples

        # fuzz the labels, forcing the pipeline to binarize these (i.e., force
        # them to be 0 or 1)
        fuzzed_labels = labels + np.random.normal(0, 0.1, labels.shape)
        fuzzed_labels = fuzzed_labels[..., None]

        def gen(chunksize, s, l):
            for i in xrange(0, len(s), chunksize):
                sl = slice(i, i + chunksize)
                yield dict(data=s[sl], labels=l[sl])

        _id = Pipeline.process(inp=gen(100, scaled_samples, fuzzed_labels))
        pipe = Pipeline(_id)

        # produce some new samples
        new_samples = np.random.random_sample((1000, 2))
        new_samples /= np.linalg.norm(samples, axis=1, keepdims=True)

        # scale each example randomly, so the pipeline must give it unit norm
        # to arrive at the correct answer
        new_factors = np.random.randint(1, 1000, (len(samples), 1))
        new_scaled_samples = new_factors * new_samples

        arr = ArrayWithUnits(new_scaled_samples,
                             dimensions=[
                                 TimeDimension(Seconds(1)),
                                 FrequencyDimension(
                                     LinearScale(FrequencyBand(100, 1000), 2))
                             ])

        result = pipe.pipeline.transform(arr.astype(np.float32))
        self.assertIsInstance(result.data, ArrayWithUnits)
        self.assertIsInstance(result.data.dimensions[0], TimeDimension)