Esempio n. 1
0
def main():
    encoder = Encoder()
    message = input("Type message to encode: ")
    message = message.lower()
    encoded_message, encoding_table = encoder.encode_message(message)
    print("Encoding table :")
    print(encoding_table)
    print("Encoded message :")
    print(encoded_message)
Esempio n. 2
0
    def test_train_doc2vec_model(self):
        test_model_params = {
            "doc2vec_dm": 1,
            "doc2vec_dm_mean": 1,
            "doc2vec_epochs": 1,
            "doc2vec_hs": 0,
            "doc2vec_learning_rate_start": 0.025,
            "doc2vec_learning_rate_end": 0.01,
            "doc2vec_min_count": 2,
            "doc2vec_negative": 0,
            "doc2vec_vector_size": 1,
            "doc2vec_window": 1
        }

        # Train encoder
        encoder = Encoder(test_model_params)
        print(encoder.generate_id(test_model_params))
        pass
        docs = encoder.load_documents("resources/encoding/test_docs.line")
        encoder.set_documents(docs)
        encoder.train()

        # Check model returns a random word containing 2 underscores (instrument_note_duration)
        random_word = random.choice(encoder.get_word_vectors().index2word)
        self.assertTrue(random_word.count("_") == 2)

        # TODO test convert_vector_to_text

        return
Esempio n. 3
0
    def outstream(self, buf):
        """ Synthesize output stream, using DBPSK modulation of carrier signals

    Chunks of synthesized signal will be put on an output queue.
    When output stream is finished, False is put on queue.

    Keyword arguments:
    buf -- an output Queue
    """

        # Send 0b00 byte, for differential encoding phase reference
        shifts = np.zeros(8, dtype=np.int8)
        (shifts, samples) = self.__encode_byte(shifts, 0b00, -1)
        buf.put(samples)

        bts = Encoder(self.payload).encode() + [0b00]
        for byteidx, byte in enumerate(bts):
            # Simulate random errors
            # if (byteidx%24) < 2:
            #   malform = random.randint(0, 8)
            #   byte = byte | 2**malform
            # if random.randint(1, 20) == 1:
            #   byte = random.randint(0, 254)
            (shifts, samples) = self.__encode_byte(shifts, byte, byteidx)
            buf.put(samples)
        buf.put(False)
        return buf
Esempio n. 4
0
 def _get_test_encoder(self):
     """
     Trains and returns a test encoder module, prepared from test documents.
     :return: trained encoder module for testing.
     """
     encoder = Encoder(self.test_params)
     docs = encoder.load_documents("resources/encoding/test_docs.line")
     encoder.set_documents(docs)
     encoder.train()
     return encoder
Esempio n. 5
0
def compress(quantized):
    dct = matrix(calcDCT())
    quality_matrix = quality(99)

    allSquares = []

    for index in range(len(quantized)):
        quantize_matrix = matrix(quantized[index])
        dct_perform = performDCT(quantize_matrix, dct)
        final_dct = quality_divide(quality_matrix, dct_perform)
        square = intoList(final_dct)
        # allSquares.append(bytearray(str(square)))
        nonzeros = nonZeros(square)
        allSquares.append(nonzeros)
        # if(len(nonzeros) > 0):
        #     allSquares += nonzeros

    print square

    dump(allSquares, open("compressed.jv", "wb"))
    enc = Encoder("compressed.jv")
    os.remove("compressed.jv")
    enc.write("compressed.jvad")
Esempio n. 6
0
def load_model_corpora(checkpoint):
    """ Load the model the checkpoint pointed at by `checkpoint' is for and the
        corpora indicated in the arguments within the checkpoint.
    """
    try:
        checkpoint = load_checkpoint(checkpoint)
        args = checkpoint['args']
        params = checkpoint['params']
    except Exception as e:
        print('The following exception ocurred:')
        print(e)
        raise RuntimeError('The first object in checkpoint must be a '
                           'dictionary containing at least [args,params].')
    # Use the arguments to create a model that is the same as the one we have
    # the parameters for.
    if args.load:
        with open(args.load, 'rb') as f:
            stored_dict = pickle.load(f)
        corpora = Corpus(args.corpus,
                         load=True,
                         vocab=stored_dict['vocabulary'],
                         vectors=stored_dict['vectors'])
    else:
        # I never do load = False.
        corpora = None
    if not hasattr(args, 'old_model'):
        args.old_model = False
    if args.old_model:
        model = old_model('LSTM', len(corpora.vocab), args.encoder_size,
                          args.hidden_size, args.layers, args.dropout)
    else:
        encoder = Encoder(50, len(corpora.vocab), corpora.vectors)
        model = RNNModel(encoder.encoding_size,
                         args.hidden_size,
                         len(corpora.vocab),
                         args.layers,
                         encoder,
                         dropout=args.dropout)
    # load the parameters from checkpoint
    model.load_state_dict(params)
    return model, corpora
Esempio n. 7
0
    def _get_encoder(self, params):
        """
        Retrieves encoder with given parameters, either from cache (if available) or by training a new model.
        :param params: the encoder parameters.
        :return: the trained encoder model.
        """
        # TODO: refactor method

        # Check if encoder was already trained with these parameters
        encoder_id = Encoder.generate_id(params)
        self._logger.debug("Retrieving encoder model: " + str(encoder_id))

        # Check if matching encoder is in memory
        if encoder_id in self._trained_encoders:
            self._logger.debug("Loading encoder from in-memory cache: " + str(encoder_id))
            return self._trained_encoders[encoder_id]
        else:
            # Check if matching encoder on disk
            prev_model = None
            if self._encoder_dir is not None:
                prev_model = Encoder.load_if_exists(self._encoder_dir, encoder_id)

            if prev_model is not None:
                self._logger.debug("Loaded encoder from disk-cache: " + str(encoder_id))
                encoder = Encoder(params)
                docs = self._get_docs(encoder, params['doc2vec_docs'])
                encoder.set_documents(docs)
                encoder.set_model(prev_model)
                self._trained_encoders[encoder_id] = encoder
                return encoder
            else:
                self._logger.debug("Training new encoder model: " + str(encoder_id))
                encoder = Encoder(params)
                docs = self._get_docs(encoder, params['doc2vec_docs'])
                encoder.set_documents(docs)
                encoder.train()
                self._trained_encoders[encoder_id] = encoder
                self._logger.debug("Added encoder to cache: " + str(encoder_id))

                # Save encoder
                if self._encoder_dir is not None:
                    encoder.save(self._encoder_dir + "/" + encoder_id)
                return encoder
from encoding import Encoder

payload = [104, 101, 108, 108, 111]  # "hello" in ASCII, 5 bytes

# encoded single frame
# [22, 22, 104, 101, 108, 108, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 210, 201, 10, 89, 213, 255]
stream = Encoder(payload).encode()
Esempio n. 9
0
import pandas as pd
import numpy as np
from encoding import Encoder

df = pd.DataFrame({"feature1":[1, 57, 23, 7, 8, 0, 11, 54, 0, 1],
                   "feature2": [1, 2, 3, 4, 5, 8, 7, 8, 9, 10],
                   "feature3": ['Red', 'Blue', 'Red', 'Yellow', 'Blue', 'Blue', 'Yellow', 'Red', 'Yellow', 'Red'],
                   "feature4": ['France', 'USA', 'USA', 'Canada', 'USA', 'Canada', 'France', 'Canada', 'USA', 'France'],
                   "label":['Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']})

#categoryCol = ["feature3", "feature4"]

labelCol = ['label']
categoryCol= [x for x in df.columns.drop(labelCol) if df[x].dtype == 'object']




df_category = Encoder(data=df)
# df_encoded = df_category.GetDummies(categoryCol=categoryCol)
df_encoded = df_category.OneHotEncode(categoryCol=categoryCol)
df_encoded = df_category.LabelEncode(labelCol=labelCol)

print("Orignial Dataset: \n", df)
print("Encoded Dataset: \n", df_encoded)

Esempio n. 10
0
    else:
        # Load the pre-trained embeddings
        from gensim.models import KeyedVectors
        embeddings = KeyedVectors.load_word2vec_format(args.vectors_path,
                                                       binary=True)
        # Load the corpora, find the vocabulary and what is in the embeddings.
        corpora = Corpus(args.corpus, embeddings)
        # Don't need the embeddings any longer. corpora has a copy of the relevant
        # vectors.
        del embeddings

    if args.old_model:
        model = old_model('LSTM', len(corpora.vocab), args.encoder_size,
                          args.hidden_size, args.layers, args.dropout)
    else:
        encoder = Encoder(50, len(corpora.vocab), corpora.vectors)
        model = RNNModel(encoder.encoding_size,
                         args.hidden_size,
                         len(corpora.vocab),
                         args.layers,
                         encoder,
                         dropout=args.dropout)

    criterion = torch.nn.CrossEntropyLoss()
    trainer = Trainer(model, corpora, criterion, device, logger,
                      args.batch_size, args.seq_len, args.lr,
                      args.log_interval, args.clip_grad)
    best_valid_loss = float("inf")
    for epoch in range(args.epochs):
        print('Time at the start of epoch {} is {}'.format(
            epoch, datetime.now()))
def main():
    # Documents used to train semantic encoder model
    encoder_training_docs = "../resources/encoder_training_docs/full_1_measure_20k.txt"

    model_params = {

        # Encoder (doc2vec) settings:
        'encoder_training_docs': encoder_training_docs,
        'doc2vec_dm': 1,
        'doc2vec_dm_mean': 1,
        'doc2vec_epochs': 1,
        'doc2vec_hs': 0,
        'doc2vec_learning_rate_start': 0.025,
        'doc2vec_learning_rate_end': 0.2,
        'doc2vec_min_count': 5,
        'doc2vec_negative': 0,
        'doc2vec_vector_size': 5,
        'doc2vec_window': 1,

        # Sequence learning (Keras LSTM) settings:
        'nn_features': ['bpm', 'measure', 'beat'],
        'nn_batch_size': 100,
        'nn_dense_activation_function': "linear",
        'nn_dropout': 0.05,
        'nn_epochs': 5,
        'nn_hidden_neurons': 10,
        'nn_layers': 10,
        'nn_lstm_activation_function': "selu",
        'nn_lstm_n_prev': 4
    }

    # Train encoder
    encoder = Encoder(model_params)
    docs = encoder.load_documents(model_params['encoder_training_docs'])
    encoder.set_documents(docs)
    encoder.train()

    # Define note mapper for MIDI file loading
    note_mapping_config_path = "../settings/map-to-group.json"
    note_mapper = NoteMapper(note_mapping_config_path)

    # Define training documents for sequence learning
    training_docs = [
        "/Users/taylorpeer/Projects/se-project/midi-embeddings/data/corpora/test/training"
    ]  # TODO paths...

    # Define evaluation documents for sequence learning
    evaluation_docs = []
    evaluation_docs.append(
        "/Users/taylorpeer/Projects/se-project/midi-embeddings/data/corpora/test/test"
    )  # TODO paths...

    # Load training MIDI files using MidiDataLoader
    data_loader = MidiDataLoader(note_mapper,
                                 params=model_params,
                                 encoder=encoder)
    training_data = data_loader.load_data_as_array(training_docs)

    # Set fit_scaler=False to re-use scaler from training set
    test_data = data_loader.load_data_as_array(evaluation_docs,
                                               fit_scaler=False)
    (x_test, y_test) = test_data

    # Train sequence learning model
    sequence_model = GenerativeSequenceLearner(model_params)
    sequence_model.train(training_data)

    # Apply trained model to test set
    predicted = sequence_model.predict(x_test)

    # Evaluate accuracy of model on test set
    evaluator = Evaluator()
    average_error = evaluator.compute_average_error(predicted, y_test)

    # Un-scale predicted and actual values
    scaler = data_loader.get_scaler()
    predicted = scaler.inverse_transform(predicted)
    y_test = scaler.inverse_transform(y_test)

    # Convert predicted vectors to note sequence
    predicted_notes = encoder.convert_feature_vectors_to_text(predicted)

    # Convert actual vectors to note sequence
    actual_notes = encoder.convert_feature_vectors_to_text(y_test)

    # Compute accuracy by measuring precision/recall of predicted vs. actual notes at every timestamp of evaluation
    (precision, recall,
     f1) = evaluator.compute_seq_accuracy(predicted_notes, actual_notes)

    # Remove doc2vec_docs params setting, since otherwise params can't be printed
    model_params = dict((key, value) for key, value in model_params.items()
                        if key != 'doc2vec_docs')

    print(str(model_params))
    print("- precision: " + str(precision))
    print("- recall: " + str(recall))
    print("- f1: " + str(f1))
    print("- average error: " + str(average_error))
    print("---")
Esempio n. 12
0
def main():
    # Documents used to train semantic encoder model
    #encoder_training_docs = "../../midi-embeddings/data/full_1_measure.txt"
    encoder_training_docs = "../resources/encoder_training_docs/full_1_measure_20k.txt"

    model_params = {

        # Encoder (doc2vec) settings:
        'doc2vec_docs': encoder_training_docs,
        'doc2vec_dm': 1,
        'doc2vec_dm_mean': 1,
        'doc2vec_epochs': 2,
        'doc2vec_hs': 0,
        'doc2vec_learning_rate_start': 0.025,
        'doc2vec_learning_rate_end': 0.2,
        'doc2vec_min_count': 10,
        'doc2vec_negative': 0,
        'doc2vec_vector_size': 20,  # 24,
        'doc2vec_window': 10,  # 3,

        # Sequence learning (Keras LSTM) settings:
        'nn_features': ['bpm', 'measure', 'beat'],
        'nn_batch_size': 15,
        'nn_dense_activation_function': "linear",
        'nn_dropout': 0.1,
        'nn_epochs': 75,
        'nn_hidden_neurons': 30,  # 30,
        'nn_layers': 20,  # 15,
        'nn_lstm_activation_function': "selu",
        'nn_lstm_n_prev': 16,
        'nn_loss': 'mean_absolute_error',
        'nn_optimizer': 'rmsprop'
    }

    # Train encoder
    encoder = Encoder(model_params)
    docs = encoder.load_documents(model_params['doc2vec_docs'])
    encoder.set_documents(docs)
    encoder.train()

    # Define note mapper for MIDI file loading
    note_mapping_config_path = "../settings/map-to-group.json"
    note_mapper = NoteMapper(note_mapping_config_path)

    # Define training documents for sequence learning
    training_docs = ["../resources/midi/breakbeats"]

    # Load training MIDI files using MidiDataLoader
    data_loader = MidiDataLoader(note_mapper,
                                 params=model_params,
                                 encoder=encoder)
    training_data = data_loader.load_data_as_array(training_docs)

    # Train sequence learning model
    sequence_model = GenerativeSequenceLearner(model_params)
    sequence_model.train(training_data)

    # TODO select seed sequence for training
    seed_sequences = [
        "../resources/midi/breakbeats/084 Breakthru.mid",
        "../resources/midi/breakbeats/086 Clouds.mid",
        "../resources/midi/breakbeats/089 Get Out.mid",
        "../resources/midi/breakbeats/089 Wrong.mid",
        "../resources/midi/breakbeats/090 Deceive.mid",
        "../resources/midi/breakbeats/090 New York.mid",
        "../resources/midi/breakbeats/090 Radio.mid",
        "../resources/midi/breakbeats/093 Pretender.mid",
        "../resources/midi/breakbeats/093 Right Won.mid",
        "../resources/midi/breakbeats/094 Run.mid"
    ]

    sequence_generator = SequenceGenerator(data_loader, sequence_model)
    length = 64

    for seq_index, seed in enumerate(seed_sequences):
        generated_seq_df = sequence_generator.generate(seed, length)

        writer = MidiWriter(note_mapper)
        save_to_path = "test_seq_" + str(seq_index) + ".mid"
        writer.convert_to_midi(generated_seq_df, save_to_path)
        print("---")

        print(generated_seq_df.to_string())