def main(): encoder = Encoder() message = input("Type message to encode: ") message = message.lower() encoded_message, encoding_table = encoder.encode_message(message) print("Encoding table :") print(encoding_table) print("Encoded message :") print(encoded_message)
def test_train_doc2vec_model(self): test_model_params = { "doc2vec_dm": 1, "doc2vec_dm_mean": 1, "doc2vec_epochs": 1, "doc2vec_hs": 0, "doc2vec_learning_rate_start": 0.025, "doc2vec_learning_rate_end": 0.01, "doc2vec_min_count": 2, "doc2vec_negative": 0, "doc2vec_vector_size": 1, "doc2vec_window": 1 } # Train encoder encoder = Encoder(test_model_params) print(encoder.generate_id(test_model_params)) pass docs = encoder.load_documents("resources/encoding/test_docs.line") encoder.set_documents(docs) encoder.train() # Check model returns a random word containing 2 underscores (instrument_note_duration) random_word = random.choice(encoder.get_word_vectors().index2word) self.assertTrue(random_word.count("_") == 2) # TODO test convert_vector_to_text return
def outstream(self, buf): """ Synthesize output stream, using DBPSK modulation of carrier signals Chunks of synthesized signal will be put on an output queue. When output stream is finished, False is put on queue. Keyword arguments: buf -- an output Queue """ # Send 0b00 byte, for differential encoding phase reference shifts = np.zeros(8, dtype=np.int8) (shifts, samples) = self.__encode_byte(shifts, 0b00, -1) buf.put(samples) bts = Encoder(self.payload).encode() + [0b00] for byteidx, byte in enumerate(bts): # Simulate random errors # if (byteidx%24) < 2: # malform = random.randint(0, 8) # byte = byte | 2**malform # if random.randint(1, 20) == 1: # byte = random.randint(0, 254) (shifts, samples) = self.__encode_byte(shifts, byte, byteidx) buf.put(samples) buf.put(False) return buf
def _get_test_encoder(self): """ Trains and returns a test encoder module, prepared from test documents. :return: trained encoder module for testing. """ encoder = Encoder(self.test_params) docs = encoder.load_documents("resources/encoding/test_docs.line") encoder.set_documents(docs) encoder.train() return encoder
def compress(quantized): dct = matrix(calcDCT()) quality_matrix = quality(99) allSquares = [] for index in range(len(quantized)): quantize_matrix = matrix(quantized[index]) dct_perform = performDCT(quantize_matrix, dct) final_dct = quality_divide(quality_matrix, dct_perform) square = intoList(final_dct) # allSquares.append(bytearray(str(square))) nonzeros = nonZeros(square) allSquares.append(nonzeros) # if(len(nonzeros) > 0): # allSquares += nonzeros print square dump(allSquares, open("compressed.jv", "wb")) enc = Encoder("compressed.jv") os.remove("compressed.jv") enc.write("compressed.jvad")
def load_model_corpora(checkpoint): """ Load the model the checkpoint pointed at by `checkpoint' is for and the corpora indicated in the arguments within the checkpoint. """ try: checkpoint = load_checkpoint(checkpoint) args = checkpoint['args'] params = checkpoint['params'] except Exception as e: print('The following exception ocurred:') print(e) raise RuntimeError('The first object in checkpoint must be a ' 'dictionary containing at least [args,params].') # Use the arguments to create a model that is the same as the one we have # the parameters for. if args.load: with open(args.load, 'rb') as f: stored_dict = pickle.load(f) corpora = Corpus(args.corpus, load=True, vocab=stored_dict['vocabulary'], vectors=stored_dict['vectors']) else: # I never do load = False. corpora = None if not hasattr(args, 'old_model'): args.old_model = False if args.old_model: model = old_model('LSTM', len(corpora.vocab), args.encoder_size, args.hidden_size, args.layers, args.dropout) else: encoder = Encoder(50, len(corpora.vocab), corpora.vectors) model = RNNModel(encoder.encoding_size, args.hidden_size, len(corpora.vocab), args.layers, encoder, dropout=args.dropout) # load the parameters from checkpoint model.load_state_dict(params) return model, corpora
def _get_encoder(self, params): """ Retrieves encoder with given parameters, either from cache (if available) or by training a new model. :param params: the encoder parameters. :return: the trained encoder model. """ # TODO: refactor method # Check if encoder was already trained with these parameters encoder_id = Encoder.generate_id(params) self._logger.debug("Retrieving encoder model: " + str(encoder_id)) # Check if matching encoder is in memory if encoder_id in self._trained_encoders: self._logger.debug("Loading encoder from in-memory cache: " + str(encoder_id)) return self._trained_encoders[encoder_id] else: # Check if matching encoder on disk prev_model = None if self._encoder_dir is not None: prev_model = Encoder.load_if_exists(self._encoder_dir, encoder_id) if prev_model is not None: self._logger.debug("Loaded encoder from disk-cache: " + str(encoder_id)) encoder = Encoder(params) docs = self._get_docs(encoder, params['doc2vec_docs']) encoder.set_documents(docs) encoder.set_model(prev_model) self._trained_encoders[encoder_id] = encoder return encoder else: self._logger.debug("Training new encoder model: " + str(encoder_id)) encoder = Encoder(params) docs = self._get_docs(encoder, params['doc2vec_docs']) encoder.set_documents(docs) encoder.train() self._trained_encoders[encoder_id] = encoder self._logger.debug("Added encoder to cache: " + str(encoder_id)) # Save encoder if self._encoder_dir is not None: encoder.save(self._encoder_dir + "/" + encoder_id) return encoder
from encoding import Encoder payload = [104, 101, 108, 108, 111] # "hello" in ASCII, 5 bytes # encoded single frame # [22, 22, 104, 101, 108, 108, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 210, 201, 10, 89, 213, 255] stream = Encoder(payload).encode()
import pandas as pd import numpy as np from encoding import Encoder df = pd.DataFrame({"feature1":[1, 57, 23, 7, 8, 0, 11, 54, 0, 1], "feature2": [1, 2, 3, 4, 5, 8, 7, 8, 9, 10], "feature3": ['Red', 'Blue', 'Red', 'Yellow', 'Blue', 'Blue', 'Yellow', 'Red', 'Yellow', 'Red'], "feature4": ['France', 'USA', 'USA', 'Canada', 'USA', 'Canada', 'France', 'Canada', 'USA', 'France'], "label":['Yes', 'No', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']}) #categoryCol = ["feature3", "feature4"] labelCol = ['label'] categoryCol= [x for x in df.columns.drop(labelCol) if df[x].dtype == 'object'] df_category = Encoder(data=df) # df_encoded = df_category.GetDummies(categoryCol=categoryCol) df_encoded = df_category.OneHotEncode(categoryCol=categoryCol) df_encoded = df_category.LabelEncode(labelCol=labelCol) print("Orignial Dataset: \n", df) print("Encoded Dataset: \n", df_encoded)
else: # Load the pre-trained embeddings from gensim.models import KeyedVectors embeddings = KeyedVectors.load_word2vec_format(args.vectors_path, binary=True) # Load the corpora, find the vocabulary and what is in the embeddings. corpora = Corpus(args.corpus, embeddings) # Don't need the embeddings any longer. corpora has a copy of the relevant # vectors. del embeddings if args.old_model: model = old_model('LSTM', len(corpora.vocab), args.encoder_size, args.hidden_size, args.layers, args.dropout) else: encoder = Encoder(50, len(corpora.vocab), corpora.vectors) model = RNNModel(encoder.encoding_size, args.hidden_size, len(corpora.vocab), args.layers, encoder, dropout=args.dropout) criterion = torch.nn.CrossEntropyLoss() trainer = Trainer(model, corpora, criterion, device, logger, args.batch_size, args.seq_len, args.lr, args.log_interval, args.clip_grad) best_valid_loss = float("inf") for epoch in range(args.epochs): print('Time at the start of epoch {} is {}'.format( epoch, datetime.now()))
def main(): # Documents used to train semantic encoder model encoder_training_docs = "../resources/encoder_training_docs/full_1_measure_20k.txt" model_params = { # Encoder (doc2vec) settings: 'encoder_training_docs': encoder_training_docs, 'doc2vec_dm': 1, 'doc2vec_dm_mean': 1, 'doc2vec_epochs': 1, 'doc2vec_hs': 0, 'doc2vec_learning_rate_start': 0.025, 'doc2vec_learning_rate_end': 0.2, 'doc2vec_min_count': 5, 'doc2vec_negative': 0, 'doc2vec_vector_size': 5, 'doc2vec_window': 1, # Sequence learning (Keras LSTM) settings: 'nn_features': ['bpm', 'measure', 'beat'], 'nn_batch_size': 100, 'nn_dense_activation_function': "linear", 'nn_dropout': 0.05, 'nn_epochs': 5, 'nn_hidden_neurons': 10, 'nn_layers': 10, 'nn_lstm_activation_function': "selu", 'nn_lstm_n_prev': 4 } # Train encoder encoder = Encoder(model_params) docs = encoder.load_documents(model_params['encoder_training_docs']) encoder.set_documents(docs) encoder.train() # Define note mapper for MIDI file loading note_mapping_config_path = "../settings/map-to-group.json" note_mapper = NoteMapper(note_mapping_config_path) # Define training documents for sequence learning training_docs = [ "/Users/taylorpeer/Projects/se-project/midi-embeddings/data/corpora/test/training" ] # TODO paths... # Define evaluation documents for sequence learning evaluation_docs = [] evaluation_docs.append( "/Users/taylorpeer/Projects/se-project/midi-embeddings/data/corpora/test/test" ) # TODO paths... # Load training MIDI files using MidiDataLoader data_loader = MidiDataLoader(note_mapper, params=model_params, encoder=encoder) training_data = data_loader.load_data_as_array(training_docs) # Set fit_scaler=False to re-use scaler from training set test_data = data_loader.load_data_as_array(evaluation_docs, fit_scaler=False) (x_test, y_test) = test_data # Train sequence learning model sequence_model = GenerativeSequenceLearner(model_params) sequence_model.train(training_data) # Apply trained model to test set predicted = sequence_model.predict(x_test) # Evaluate accuracy of model on test set evaluator = Evaluator() average_error = evaluator.compute_average_error(predicted, y_test) # Un-scale predicted and actual values scaler = data_loader.get_scaler() predicted = scaler.inverse_transform(predicted) y_test = scaler.inverse_transform(y_test) # Convert predicted vectors to note sequence predicted_notes = encoder.convert_feature_vectors_to_text(predicted) # Convert actual vectors to note sequence actual_notes = encoder.convert_feature_vectors_to_text(y_test) # Compute accuracy by measuring precision/recall of predicted vs. actual notes at every timestamp of evaluation (precision, recall, f1) = evaluator.compute_seq_accuracy(predicted_notes, actual_notes) # Remove doc2vec_docs params setting, since otherwise params can't be printed model_params = dict((key, value) for key, value in model_params.items() if key != 'doc2vec_docs') print(str(model_params)) print("- precision: " + str(precision)) print("- recall: " + str(recall)) print("- f1: " + str(f1)) print("- average error: " + str(average_error)) print("---")
def main(): # Documents used to train semantic encoder model #encoder_training_docs = "../../midi-embeddings/data/full_1_measure.txt" encoder_training_docs = "../resources/encoder_training_docs/full_1_measure_20k.txt" model_params = { # Encoder (doc2vec) settings: 'doc2vec_docs': encoder_training_docs, 'doc2vec_dm': 1, 'doc2vec_dm_mean': 1, 'doc2vec_epochs': 2, 'doc2vec_hs': 0, 'doc2vec_learning_rate_start': 0.025, 'doc2vec_learning_rate_end': 0.2, 'doc2vec_min_count': 10, 'doc2vec_negative': 0, 'doc2vec_vector_size': 20, # 24, 'doc2vec_window': 10, # 3, # Sequence learning (Keras LSTM) settings: 'nn_features': ['bpm', 'measure', 'beat'], 'nn_batch_size': 15, 'nn_dense_activation_function': "linear", 'nn_dropout': 0.1, 'nn_epochs': 75, 'nn_hidden_neurons': 30, # 30, 'nn_layers': 20, # 15, 'nn_lstm_activation_function': "selu", 'nn_lstm_n_prev': 16, 'nn_loss': 'mean_absolute_error', 'nn_optimizer': 'rmsprop' } # Train encoder encoder = Encoder(model_params) docs = encoder.load_documents(model_params['doc2vec_docs']) encoder.set_documents(docs) encoder.train() # Define note mapper for MIDI file loading note_mapping_config_path = "../settings/map-to-group.json" note_mapper = NoteMapper(note_mapping_config_path) # Define training documents for sequence learning training_docs = ["../resources/midi/breakbeats"] # Load training MIDI files using MidiDataLoader data_loader = MidiDataLoader(note_mapper, params=model_params, encoder=encoder) training_data = data_loader.load_data_as_array(training_docs) # Train sequence learning model sequence_model = GenerativeSequenceLearner(model_params) sequence_model.train(training_data) # TODO select seed sequence for training seed_sequences = [ "../resources/midi/breakbeats/084 Breakthru.mid", "../resources/midi/breakbeats/086 Clouds.mid", "../resources/midi/breakbeats/089 Get Out.mid", "../resources/midi/breakbeats/089 Wrong.mid", "../resources/midi/breakbeats/090 Deceive.mid", "../resources/midi/breakbeats/090 New York.mid", "../resources/midi/breakbeats/090 Radio.mid", "../resources/midi/breakbeats/093 Pretender.mid", "../resources/midi/breakbeats/093 Right Won.mid", "../resources/midi/breakbeats/094 Run.mid" ] sequence_generator = SequenceGenerator(data_loader, sequence_model) length = 64 for seq_index, seed in enumerate(seed_sequences): generated_seq_df = sequence_generator.generate(seed, length) writer = MidiWriter(note_mapper) save_to_path = "test_seq_" + str(seq_index) + ".mid" writer.convert_to_midi(generated_seq_df, save_to_path) print("---") print(generated_seq_df.to_string())