Example #1
0
class BarkKmeans(ff.BaseModel):
    docs = ff.Feature(
        ff.IteratorNode,
        store=False)

    shuffle = ff.NumpyFeature(
        zounds.ShuffledSamples,
        nsamples=int(1e6),
        needs=docs,
        store=True)

    unitnorm = ff.PickleFeature(
        zounds.UnitNorm,
        needs=shuffle,
        store=False)

    kmeans = ff.PickleFeature(
        zounds.KMeans,
        centroids=128,
        needs=unitnorm,
        store=False)

    pipeline = ff.PickleFeature(
        zounds.PreprocessingPipeline,
        needs=(unitnorm, kmeans),
        store=True)
Example #2
0
class FreqAdaptiveAutoEncoder(ff.BaseModel):
    """
    Define a processing pipeline to learn a compressed representation of the
    Sound.freq_adaptive feature.  Once this is trained and the pipeline is
    stored, we can apply all the pre-processing steps and the autoencoder
    forward and in reverse.
    """
    docs = ff.Feature(ff.IteratorNode)

    shuffle = ff.PickleFeature(zounds.ShuffledSamples,
                               nsamples=500000,
                               dtype=np.float32,
                               needs=docs)

    mu_law = ff.PickleFeature(zounds.MuLawCompressed, needs=shuffle)

    scaled = ff.PickleFeature(zounds.InstanceScaling, needs=mu_law)

    autoencoder = ff.PickleFeature(
        zounds.PyTorchAutoEncoder,
        trainer=zounds.SupervisedTrainer(
            AutoEncoder(),
            loss=nn.MSELoss(),
            optimizer=lambda model: optim.Adam(model.parameters(), lr=0.00005),
            epochs=100,
            batch_size=64,
            holdout_percent=0.5),
        needs=scaled)

    # assemble the previous steps into a re-usable pipeline, which can perform
    # forward and backward transformations
    pipeline = ff.PickleFeature(zounds.PreprocessingPipeline,
                                needs=(mu_law, scaled, autoencoder),
                                store=True)
Example #3
0
    class InfiniteLearningPipeline(cls):
        dataset = ff.Feature(
            InfiniteSampler,
            nsamples=ff.Var('nsamples'),
            dtype=ff.Var('dtype'),
            feature_filter=ff.Var('feature_filter'),
            parallel=ff.Var('parallel'))

        pipeline = ff.ClobberPickleFeature(
            PreprocessingPipeline,
            needs=cls.features,
            store=True)

        @classmethod
        def load_network(cls):
            if not cls.exists():
                raise RuntimeError('No network has been trained or saved')

            instance = cls()
            for p in instance.pipeline:
                try:
                    return p.network
                except AttributeError:
                    pass

            raise RuntimeError('There is no network in the pipeline')
Example #4
0
class Corpus(ff.BaseModel):
    """
    Define the processing graph needed to extract corpus-level features,
    whether, and how those features should be persisted.
    """
    docs = ff.Feature(
        lambda doc_cls: (doc.counts for doc in doc_cls),
        store=False)

    total_counts = ff.JSONFeature(
        WordCount,
        needs=docs,
        store=True)
Example #5
0
class DctKmeansWithLogAmplitude(ff.BaseModel):
    """
    A pipeline that applies a logarithmic weighting to the magnitudes of the
    spectrum before learning centroids,
    """
    docs = ff.Feature(
            ff.IteratorNode,
            store=False)

    # randomize the order of the data
    shuffle = ff.NumpyFeature(
            zounds.ReservoirSampler,
            nsamples=1e6,
            needs=docs,
            store=True)

    log = ff.PickleFeature(
            zounds.Log,
            needs=shuffle,
            store=False)

    # give each frame unit norm, since we care about the shape of the spectrum
    # and not its magnitude
    unit_norm = ff.PickleFeature(
            zounds.UnitNorm,
            needs=log,
            store=False)

    # learn 512 centroids, or basis functions
    kmeans = ff.PickleFeature(
            zounds.KMeans,
            centroids=512,
            needs=unit_norm,
            store=False)

    # assemble the previous steps into a re-usable pipeline, which can perform
    # forward and backward transformations
    pipeline = ff.PickleFeature(
            zounds.PreprocessingPipeline,
            needs=(log, unit_norm, kmeans),
            store=True)
Example #6
0
    class Rbm(featureflow.BaseModel, Settings):
        iterator = featureflow.Feature(Iterator, store=False)

        shuffle = featureflow.NumpyFeature(ReservoirSampler,
                                           nsamples=1000,
                                           needs=iterator,
                                           store=True)

        unitnorm = featureflow.PickleFeature(UnitNorm,
                                             needs=shuffle,
                                             store=False)

        meanstd = featureflow.PickleFeature(MeanStdNormalization,
                                            needs=unitnorm,
                                            store=False)

        rbm = featureflow.PickleFeature(KMeans,
                                        centroids=3,
                                        needs=meanstd,
                                        store=False)

        pipeline = featureflow.PickleFeature(PreprocessingPipeline,
                                             needs=(unitnorm, meanstd, rbm),
                                             store=True)
Example #7
0
class Document(ff.BaseModel):
    """
    Define the processing graph needed to extract document-level features,
    whether, and how those features should be persisted.
    """
    raw = ff.ByteStreamFeature(
        ff.ByteStream,
        chunksize=128,
        store=True)

    checksum = ff.JSONFeature(
        CheckSum,
        needs=raw,
        store=True)

    tokens = ff.Feature(
        Tokenizer,
        needs=raw,
        store=False)

    counts = ff.JSONFeature(
        WordCount,
        needs=tokens,
        store=True)
Example #8
0
class DctKmeans(ff.BaseModel):
    """
    A pipeline that does example-wise normalization by giving each example
    unit-norm, and learns 512 centroids from those examples.
    """
    docs = ff.Feature(
            ff.IteratorNode,
            store=False)

    # randomize the order of the data
    shuffle = ff.NumpyFeature(
            zounds.ReservoirSampler,
            nsamples=1e6,
            needs=docs,
            store=True)

    # give each frame unit norm, since we care about the shape of the spectrum
    # and not its magnitude
    unit_norm = ff.PickleFeature(
            zounds.UnitNorm,
            needs=shuffle,
            store=False)

    # learn 512 centroids, or basis functions
    kmeans = ff.PickleFeature(
            zounds.KMeans,
            centroids=512,
            needs=unit_norm,
            store=False)

    # assemble the previous steps into a re-usable pipeline, which can perform
    # forward and backward transformations
    pipeline = ff.PickleFeature(
            zounds.PreprocessingPipeline,
            needs=(unit_norm, kmeans),
            store=True)