def main(): parser = argparse.ArgumentParser(description='Decode packets from the Maxkon 433MHz weather station.') parser.add_argument('--file', '-f', help='Decode packets from a WAV file') parser.add_argument('--show-raw', '-r', action='store_true', help='Show raw packet data in hexidecimal') parser.add_argument('--print-on-times', '-t', action='store_true', help='Show the on times as reported by the PWMDecoder') args = parser.parse_args() decoder = Decoder(args.print_on_times) if (args.file): # Suppress WAV file warnings warnings.filterwarnings('ignore') sampFreq, samples = wavfile.read(args.file) warnings.filterwarnings('default') packets = decoder.add_samples(samples.tolist()) for packet in packets: print_packet(packet, args.show_raw) else: def audio_samples_ready(samples): packets = decoder.add_samples(samples) for packet in packets: print_packet(packet, args.show_raw) audio_in = AudioIn(audio_samples_ready) while True: time.sleep(0.1)
def get_n_best_lists(initial_params, n, args): sys.stderr.write("Getting n best lists...\n") num_songs_translated = 0 n_best_lists = {} i = 0 f = open(args.training_songs, "r") for path in f: path = path.strip() if not path: continue training_song = converter.parse(path); num_songs_translated += 1 transpose(training_song, "C") sys.stderr.write("transposed " + path + "\n") lm = LanguageModel(args.harmony, "%s/%s_language_model.txt" % (args.model_directory, args.harmony)) tms = [] melodies = args.melodies.split(",") for melody in melodies: phrases = "%s/%s_%s_translation_model_rhythm.txt" % (args.model_directory, melody, args.harmony) notes = "%s/%s_%s_translation_model.txt" % (args.model_directory, melody, args.harmony) tm = TranslationModel(melody, args.harmony, phrases, notes) tms.append(tm) d = Decoder([(melody, training_song.parts[melody]) for melody in melodies], lm, tms, tm_phrase_weight=initial_params[0], tm_notes_weight=initial_params[1], lm_weight=initial_params[2]) try: hyps = d.decode(n) n_best_lists[path] = hyps sys.stderr.write("decoded " + path + "\n") i += 1 except Exception as e: sys.stderr.write(str(e)) return n_best_lists
def download_file_task(self, file_obj, item): funcName = "[Downloader.download_file_task]" log(6, funcName, 'Downloading file:', file_obj.name) decoder = Decoder(item, file_obj) try: Thread.AcquireLock(self.article_lock) for article_obj in file_obj.articles: self.article_queue.put(DownloadInfo(file_obj, article_obj, decoder, item)) log(9, funcName, 'self.article_queue.qsize():', self.article_queue.qsize()) except: log(1, funcName, 'Error adding articles to queue') finally: Thread.ReleaseLock(self.article_lock) log(7, funcName, 'Waiting for decoder to complete for file', file_obj.name) decoder.wait() log(7, funcName, 'downloaded filename:', decoder.filename, 'size:', len(decoder.data)) #Core.storage.save(Core.storage.join_path(item.incoming_path, decoder.filename), decoder.data) #saver = Saver(item.incoming_path, decoder.filename, decoder.data) #saver.save() #log(7, funcName, 'saved file:', decoder.filename) #return (decoder.filename, decoder.decoded_data) if file_obj in item.nzb.rars: if item.nzb.rars[item.nzb.rars.index(file_obj)].name != decoder.filename: log(3, funcName, 'Updating item nzb rars file to', decoder.filename) item.nzb.rars[item.nzb.rars.index(file_obj)].name=decoder.filename if file_obj in item.nzb.pars: if item.nzb.pars[item.nzb.pars.index(file_obj)].name != decoder.filename: log(3, funcName, 'Updating item nzb pars file to', decoder.filename) item.nzb.pars[item.nzb.pars.index(file_obj)].name=decoder.filename item.save() return decoder.filename, decoder.data
def main(): dbc = DBController() dec = Decoder([TLEDecoder(), TLEListDecoder()]) dlc = None try: dlc = Downloader() except DownloaderError as e: print("failed to initialize downloader: " + str(e)) sys.exit(1) for esat in dlc.get_data(): sats = [] try: sats = dec.decode(esat.fmt, esat.data) except DecoderError as e: print("failed to decode: " + str(e)) try: for sat in sats: dbc.add(sat) dbc.sync() except DBError as e: print("failed to insert into db: " + str(e))
def parse_contents(f, bytes_to_read): ''' Do complex reading of caption data from binary file. Return a list of statements and characters ''' if DEBUG: print 'going to read {bytes} bytes in binary file caption statement.'.format(bytes=bytes_to_read) statements = [] bytes_read = 0 #TODO: Check to see if decoder state is carred between packet processing #currently recreating the decoder (and therefore resetting its state) #on every packet paylod processing. This may be incorrect decoder = Decoder() line = '' while bytes_read<bytes_to_read: statement = decoder.decode(f) if statement: bytes_read += len(statement) statements.append(statement) #if isinstance(statement, code_set.Kanji) or isinstance(statement, code_set.Alphanumeric) \ # or isinstance(statement, code_set.Hiragana) or isinstance(statement, code_set.Katakana): # if DEBUG: # print statement #just dump to stdout for now # line += str(statement) #if len(line)>0: # print '{l}\n'.format(l=line) return statements
def decode(self): encoded_content = open(self.args.input, 'rb').read() mappings = json.loads(open(self.args.mappings, 'r').read()) decoder = Decoder(encoded_content, mappings) raw_content = decoder.decode() with open(self.args.output, 'w') as f: f.write(raw_content)
class JsonUnmarshaler(object): """The top-level Unmarshaler used by the Reader for JSON payloads. While you may use this directly, it is strongly discouraged. """ def __init__(self): self.decoder = Decoder() def load(self, stream): return self.decoder.decode(json.load(stream, object_pairs_hook=OrderedDict)) def loadeach(self, stream): for o in sosjson.items(stream, object_pairs_hook=OrderedDict): yield self.decoder.decode(o)
class MsgPackUnmarshaler(object): """The top-level Unmarshaler used by the Reader for MsgPack payloads. While you may use this directly, it is strongly discouraged. """ def __init__(self): self.decoder = Decoder() self.unpacker = msgpack.Unpacker(object_pairs_hook=OrderedDict) def load(self, stream): return self.decoder.decode(msgpack.load(stream, object_pairs_hook=OrderedDict)) def loadeach(self, stream): for o in self.unpacker: yield self.decoder.decode(o)
def build(self): Window.size = (1024, 520) self.dec = Decoder(self.prop_set) Clock.schedule_interval(self.update_time, .5) Clock.schedule_interval(self.visualize, .4) thr = threading.Thread(target=self.get_candata) thr.setDaemon(True) thr.start()
def ric_decode(imgbuf): offset = 0 boxType = None boxLen, boxType, payload = iso_media.read_box(imgbuf[offset:]) if boxType != "FTYP" or payload != "RIC ": print >> sys.stderr, "Fishy file type!!!", boxType, payload return None offset += boxLen boxLen, boxType, payload = iso_media.read_box(imgbuf[offset:]) if boxType != "ILOT": print >> sys.stderr, "No offset table???", boxType return None offset += boxLen layers = wrapper.unwrapLayers(imgbuf[offset:]) outputImg = Decoder().decode(layers) output = StringIO() outputImg.save(output, "JPEG", quality = 90); return output.getvalue()
def __init__(self, fname): self.fname = fname self.decoder = Decoder(self) self.breakpoints = {} self.breakpoint_conditions = {} self.prev_input = None self.hex_input_mode = False self.tracked_registers = set([SP]) self.reset()
def decode(self, reader, writer): ''' compute pseudo likelihoods the testing set Args: reader: a feature reader object to read features to decode writer: a writer object to write likelihoods ''' #create a decoder decoder = Decoder(self.dnn, self.input_dim, reader.max_input_length) #read the prior prior = np.load(self.conf['savedir'] + '/prior.npy') #start tensorflow session config = tf.ConfigProto() config.gpu_options.allow_growth = True #pylint: disable=E1101 with tf.Session(graph=decoder.graph, config=config): #load the model decoder.restore(self.conf['savedir'] + '/final') #feed the utterances one by one to the neural net while True: utt_id, utt_mat, looped = reader.get_utt() if looped: break #compute predictions output = decoder(utt_mat) #get state likelihoods by dividing by the prior output = output/prior #floor the values to avoid problems with log np.where(output == 0, np.finfo(float).eps, output) #write the pseudo-likelihoods in kaldi feature format writer.write_next_utt(utt_id, np.log(output)) #close the writer writer.close()
def run(file_name): decoder = Decoder('{}.png'.format(file_name)) pixel_lines = [ create_pixels(decoder.Pixel, scanline, decoder.bytes_per_pixel) for scanline in decoder.decode() ] c = Classifier() color_lines = [] for line in pixel_lines: colors = [ c.classify(Point(pixel.red, pixel.green, pixel.blue, pixel.alpha)) for pixel in line ] color_lines.append(colors) with open('{}_colors.json'.format(file_name), 'w') as file: file.write(json.dumps(color_lines))
def decodeAndSolve(image, showSolution=False): d = Decoder(image) d.decode() s = SuDoKu(d.puzzle) solution = s.solution() if showSolution: img = copy(d.puzzleImage) img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) for q, p in ((x, y) for x in (i * 100 + 30 for i in range(9)) for y in (i * 100 + 70 for i in range(9))): if ((q - 30) / 100, (p - 70) / 100) not in d.numberLocations: cv2.putText( img, str(solution[(q - 30) / 100][(p - 70) / 100]), (q, p), cv2.FONT_HERSHEY_PLAIN, 4, (0, 150, 0), thickness=6, ) cv2.imshow("Solution - Press any key to exit.", img) cv2.waitKey(0)
def simulate(self, mode=Decoder.SUM_PROD): """ :param mode: The algorithm (sum-prod vs max-prod) to use in the decoder simulations :return: """ self.mode = mode self.codewords = [] self.decoded = [] for var in self.variance_levels: codewords = [] decoded = [] transmissions = [] decoder = Decoder(var, self.mode) for i in range(0, self.iterations): code = codeword.Codeword() codewords.append(code.codeword) decoded.append(decoder.decode(code.transmit(var))) transmissions.append(code.transmission) self.codewords.append(codewords) self.decoded.append(decoded) self.transmissions.append(transmissions)
def _predict_processing(self , predict_path , output_path) : if isinstance(output_path , file) : output_f = output_path else : if output_path == "stdout" : output_f = sys.stdout else : output_f = open(output_path , "w") logging.info("set output %s " %(output_f.name)) logging.info("reading instance from %s . predicting ." %(predict_path)) for instance , separator_data in DatasetHandler.read_predict_data(predict_path) : self.constrain.set_constrain_data(separator_data) predict_tags = Decoder.decode_for_predict(self.extractor , self.model , self.constrain , instance) segmented_line = self._processing_unigrams_and_tags2segmented_line(instance,predict_tags) output_f.write("%s" %( "".join([segmented_line , os.linesep]) ) ) if output_f is not sys.stdout : output_f.close() logging.info("predicting done.")
def _4training_evaluate_processing(self , dev_path) : nr_processing_right = 0 nr_gold = 0 nr_processing = 0 for instance in DatasetHandler.read_dev_data(dev_path) : unigrams , gold_tags = Segmentor._processing_one_segmented_WSAtom_instance2unigrams_and_tags(instance) predict_tags = Decoder.decode_for_predict(self.extractor , self.model , self.constrain , unigrams) gold_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags(gold_tags) predict_coor_seq = self.__innerfunc_4evaluate_generate_word_coordinate_sequence_from_tags(predict_tags) cur_nr_gold , cur_nr_processing , cur_nr_processing_right = ( self.__innerfunc_4evaluate_get_nr_gold_and_processing_and_processing_right(gold_coor_seq , predict_coor_seq) ) nr_gold += cur_nr_gold nr_processing += cur_nr_processing nr_processing_right += cur_nr_processing_right p , r , f = self.__innerfunc_4evaluate_calculate_prf(nr_gold , nr_processing , nr_processing_right) print >>sys.stderr , ("Eval result :\np : %.2f%% r : %.2f%% f : %.2f%%\n" "total word num : %d total predict word num : %d predict right num : %d ")%( p * 100 , r * 100, f * 100 , nr_gold , nr_processing , nr_processing_right ) return f
def encode_decode(self, k): print "\nTesting encoding and then decoding with k = %s" % k md5 = hashlib.md5() with FileChunker(k, SYMBOLSIZE, DEFAULT_FILE) as chunker: chunk = chunker.chunk() while chunk: padding = chunk.padding symbols = [(i, chunk[i]) for i in xrange(k)] encoder = Encoder(k, symbols) symbols = [] # Start at k/2 and produce 1.25k more symbols to get a mix # of parity and source symbols for i in xrange(k * 2): symbols.append(encoder.next()) encoder = None decoder = Decoder(k) for tup in symbols: decoder.append(tup) decoder.decode() decoded = bytearray() for i in xrange(k): esi, s = decoder.next() decoded += s.tostring() decoder = None if padding: padding = 0 - padding print "Removing padding", padding, "bytes" decoded = decoded[:padding] md5.update(decoded) # Continue on to the next chunk chunk = chunker.chunk() print "Original digest:", self.original_digest print "Decoded digest:", md5.hexdigest() return self.original_digest == md5.hexdigest()
def main(infile, outfile, fromline=0): fin = open(infile, 'r') fout = open(outfile, 'w') fseek = open(log_path + '/db_progress', 'w') fout.write("USE %s;\n" % database) # read in file i = 0 # line counter for l in fin: if i < fromline: i += 1 continue # skip lines before fromline if 'OK' in l: date = l[0:10] time = l[11:19] timestamp = "'" + date + ' ' + time + "'" packet = l[28:].strip() (nid, data) = Decoder.decode(packet) if data: if nodes.has_key(nid): table = nodes[nid]['db']['table'] fieldstring = ', '.join(table['fields']) stringdata = [timestamp] for d in data: stringdata.append("'" + str(d) + "'") valstring = ', '.join(stringdata) fout.write("INSERT INTO %s (%s) VALUES (%s);\n" % (table['name'], fieldstring, valstring)) i += 1 fseek.write(str(i)) # write last line to file fin.close() fout.close() fseek.close()
def __init__(self, N=50): Decoder.__init__(self) self.N = N
class AttentiveNP(): """ The Attentive Neural Process model. """ def __init__(self, x_size, y_size, r_size, det_encoder_hidden_size, det_encoder_n_hidden, lat_encoder_hidden_size, lat_encoder_n_hidden, decoder_hidden_size, decoder_n_hidden, lr, attention_type): """ :param x_size: An integer describing the dimensionality of the input x :param y_size: An integer describing the dimensionality of the target variable y :param r_size: An integer describing the dimensionality of the embedding / context vector r :param det_encoder_hidden_size: An integer describing the number of nodes per hidden layer in the deterministic encoder NN :param det_encoder_n_hidden: An integer describing the number of hidden layers in the deterministic encoder neural network :param lat_encoder_hidden_size: An integer describing the number of nodes per hidden layer in the latent encoder neural NN :param lat_encoder_n_hidden: An integer describing the number of hidden layers in the latent encoder neural network :param decoder_hidden_size: An integer describing the number of nodes per hidden layer in the decoder neural network :param decoder_n_hidden: An integer describing the number of hidden layers in the decoder neural network :param lr: The optimiser learning rate. :param attention_type: The type of attention to be used. A string, either "multihead", "laplace", "uniform", "dot_product" """ self.r_size = r_size self.det_encoder = DeterministicEncoder(x_size, y_size, r_size, det_encoder_n_hidden, det_encoder_hidden_size, self_att=True, cross_att=True, attention_type=attention_type) self.lat_encoder = LatentEncoder((x_size + y_size), r_size, lat_encoder_n_hidden, lat_encoder_hidden_size, self_att=True) self.decoder = Decoder((x_size + r_size + r_size), y_size, decoder_n_hidden, decoder_hidden_size) self.optimiser = optim.Adam(list(self.det_encoder.parameters()) + list(self.lat_encoder.parameters()) + list(self.decoder.parameters()), lr=lr) def train(self, x_trains, y_trains, x_tests, y_tests, x_scalers, y_scalers, batch_size, iterations, testing, plotting, dataname, print_freq): """ :param x_trains: A np.array with dimensions [N_functions, [N_train, x_size]] containing the training data (x values) :param y_trains: A np.array with dimensions [N_functions, [N_train, y_size]] containing the training data (y values) :param x_tests: A tensor with dimensions [N_functions, [N_test, x_size]] containing the test data (x values) :param y_tests: A tensor with dimensions [N_functions, [N_test, y_size]] containing the test data (y values) :param x_scalers: The standard scaler used when testing == True to convert the x values back to the correct scale. :param y_scalers: The standard scaler used when testing == True to convert the predicted y values back to the correct scale. :param batch_size: An integer describing the number of times we should sample the set of context points used to form the aggregated embedding during training, given the number of context points to be sampled N_context. When testing this is set to 1 :param iterations: An integer, describing the number of iterations. In this case it also corresponds to the number of times we sample the number of context points N_context :param testing: A Boolean object; if set to be True, then every 30 iterations the R^2 score and RMSE values will be calculated and printed for both the train and test data :param print_freq: :param dataname: :param plotting: :return: """ n_functions = len(x_trains) for iteration in range(iterations): self.optimiser.zero_grad() # Sample the function from the set of functions idx_function = np.random.randint(n_functions) x_train = x_trains[idx_function] y_train = y_trains[idx_function] max_target = x_train.shape[0] # During training, we sample n_target points from the function, and # randomly select n_context points to condition on. num_target = torch.randint(low=5, high=int(max_target), size=(1,)) num_context = torch.randint(low=3, high=int(num_target), size=(1,)) idx = [np.random.permutation(x_train.shape[0])[:num_target] for i in range(batch_size)] idx_context = [idx[i][:num_context] for i in range(batch_size)] x_target = [x_train[idx[i], :] for i in range(batch_size)] y_target = [y_train[idx[i], :] for i in range(batch_size)] x_context = [x_train[idx_context[i], :] for i in range(batch_size)] y_context = [y_train[idx_context[i], :] for i in range(batch_size)] x_target = torch.stack(x_target) y_target = torch.stack(y_target) x_context = torch.stack(x_context) y_context = torch.stack(y_context) # The deterministic encoder outputs the deterministic embedding r. r = self.det_encoder.forward(x_context, y_context, x_target) # [batch_size, N_target, r_size] # The latent encoder outputs a prior distribution over the # latent embedding z (conditioned only on the context points). z_priors, _, _ = self.lat_encoder.forward(x_context, y_context) z_posteriors, _, _ = self.lat_encoder.forward(x_target, y_target) # Sample z from the prior distribution. zs = [dist.rsample() for dist in z_priors] # [batch_size, r_size] z = torch.cat(zs) z = z.view(-1, self.r_size) # The input to the decoder is the concatenation of the target x values, # the deterministic embedding r and the latent variable z # the output is the predicted target y for each value of x. dists, _, _ = self.decoder.forward(x_target.float(), r.float(), z.float()) # Calculate the loss log_ps = [dist.log_prob(y_target[i, ...].float()) for i, dist in enumerate(dists)] log_ps = torch.cat(log_ps) kl_div = [kl_divergence(z_posterior, z_prior).float() for z_posterior, z_prior in zip(z_posteriors, z_priors)] kl_div = torch.cat(kl_div) loss = -(torch.mean(log_ps) - torch.mean(kl_div)) self.losslogger = loss # The loss should generally decrease with number of iterations, though it is not # guaranteed to decrease monotonically because at each iteration the set of # context points changes randomly. if iteration % print_freq == 0: print("Iteration " + str(iteration) + ":, Loss = {:.3f}".format(loss.item())) # We can set testing = True if we want to check that we are not over-fitting. if testing: metrics_calculator(x_trains, y_trains, x_tests, y_tests, x_scalers, y_scalers, self.predict, dataname, plotting, iteration) loss.backward() self.optimiser.step() def predict(self, x_context, y_context, x_target): """ :param x_context: A tensor of dimensions [batch_size, N_context, x_size]. When training N_context is randomly sampled between 3 and N_train; when testing N_context = N_train :param y_context: A tensor of dimensions [batch_size, N_context, y_size] :param x_target: A tensor of dimensions [N_target, x_size] :return dist: The distributions over the predicted outputs y_target :return mu: A tensor of dimensionality [batch_size, N_target, output_size] describing the means of the normal distribution. :return var: A tensor of dimensionality [batch_size, N_target, output_size] describing the variances of the normal distribution. """ r = self.det_encoder.forward(x_context, y_context, x_target) # The latent encoder outputs a distribution over the latent embedding z. dists_z, _, _ = self.lat_encoder.forward(x_context, y_context) zs = [dist.sample() for dist in dists_z] # [batch_size, r_size] z = torch.cat(zs) z = z.view(-1, self.r_size) # The input to the decoder is the concatenation of the target x values, # the deterministic embedding r and the latent variable z # the output is the predicted target y for each value of x. dist, mu, sigma = self.decoder.forward(x_target.float(), r.float(), z.float()) return dist, mu, sigma
encoder = Encoder(args.d_input * args.LFR_m, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder( sos_id, eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) for i in range(3): print("\n***** Utt", i + 1) Ti = i + 20 input = torch.randn(Ti, D) length = torch.tensor([Ti], dtype=torch.int) nbest_hyps = model.recognize(input, length, char_list, args)
def train(log_dir, n_epochs, network_dict, index2token, **kwargs): onehot_words = kwargs['onehot_words'] word_pos = kwargs['word_pos'] sentence_lens_nchars = kwargs['sentence_lens_nchars'] sentence_lens_nwords = kwargs['sentence_lens_nwords'] vocabulary_size = kwargs['vocabulary_size'] max_char_len = kwargs['max_char_len'] onehot_words_val = kwargs['onehot_words_val'] word_pos_val = kwargs['word_pos_val'] sentence_lens_nchars_val = kwargs['sentence_lens_nchars_val'] sentence_lens_nwords_val = kwargs['sentence_lens_nwords_val'] batch_size = kwargs['batch_size'] input_size = vocabulary_size hidden_size = kwargs['hidden_size'] decoder_dim = kwargs['decoder_dim'] decoder_units_p3 = kwargs['decoder_units_p3'] num_batches = len(onehot_words) // batch_size network_dict['input_size'] = input_size max_word_len = np.max(sentence_lens_nwords) encoder_k = encoder.Encoder(**network_dict) #onehot_words,word_pos,vocabulary_size = encoder_k.run_preprocess() #prepping permutation matrix for all instances seperately perm_mat, max_lat_word_len, lat_sent_len_list = prep_perm_matrix( batch_size=batch_size, word_pos_matrix=word_pos, max_char_len=max_char_len) #placeholders mask_kl_pl = tf.placeholder(name='kl_pl_mask', dtype=tf.float32, shape=[batch_size, max_lat_word_len]) sent_word_len_list_pl = tf.placeholder(name='word_lens', dtype=tf.int32, shape=[batch_size]) perm_mat_pl = tf.placeholder(name='perm_mat_pl', dtype=tf.int32, shape=[batch_size, max_lat_word_len]) onehot_words_pl = tf.placeholder( name='onehot_words', dtype=tf.float32, shape=[batch_size, max_char_len, vocabulary_size]) word_pos_pl = tf.placeholder(name='word_pos', dtype=tf.float32, shape=[batch_size, max_char_len]) sent_char_len_list_pl = tf.placeholder(name='sent_char_len_list', dtype=tf.float32, shape=[batch_size]) #decoder arg_dict = { 'decoder_p3_units': decoder_units_p3, 'encoder_dim': hidden_size, 'lat_word_dim': hidden_size, 'sentence_lens': None, 'global_lat_dim': hidden_size, 'batch_size': batch_size, 'max_num_lat_words': max_lat_word_len, 'decoder_units': decoder_dim, 'num_sentence_characters': max_char_len, 'dict_length': vocabulary_size } decoder = Decoder(**arg_dict) #step counter global_step = tf.Variable(0, name='global_step', trainable=False) word_state_out, mean_state_out, logsig_state_out = encoder_k.run_encoder( sentence_lens=sent_char_len_list_pl, train=True, inputs=onehot_words_pl, word_pos=word_pos_pl, reuse=None) #picking out our words #why do these all start at 0? # replace 0's possibly with len+1 ## RELYING ON THERE BEING NOTHING AT ZEROS #indice 0 problem? word_state_out.set_shape([max_char_len, batch_size, hidden_size]) mean_state_out.set_shape([max_char_len, batch_size, hidden_size]) logsig_state_out.set_shape([max_char_len, batch_size, hidden_size]) word_state_out_p = permute_encoder_output(encoder_out=word_state_out, perm_mat=perm_mat_pl, batch_size=batch_size, max_word_len=max_lat_word_len) mean_state_out_p = permute_encoder_output(encoder_out=mean_state_out, perm_mat=perm_mat_pl, batch_size=batch_size, max_word_len=max_lat_word_len) logsig_state_out_p = permute_encoder_output(encoder_out=logsig_state_out, perm_mat=perm_mat_pl, batch_size=batch_size, max_word_len=max_lat_word_len) #Initialize decoder ##Note to self: need to input sentence lengths vector, also check to make sure all the placeholders flow into my class and tensorflow with ease out_o, global_latent_o, global_logsig_o, global_mu_o = decoder.run_decoder( word_sequence_length=sent_word_len_list_pl, train=True, reuse=None, units_lstm_decoder=decoder_dim, lat_words=word_state_out_p, units_dense_global=decoder_dim, char_sequence_length=tf.cast(sent_char_len_list_pl, dtype=tf.int32)) # shaping for batching #reshape problem onehot_words = np.reshape( onehot_words, newshape=[-1, batch_size, max_char_len, vocabulary_size]) word_pos = np.reshape(word_pos, newshape=[-1, batch_size, max_char_len]) # making word masks for kl term kl_mask = [] print(sentence_lens_nwords) for word_len in np.reshape(lat_sent_len_list, -1): vec = np.zeros([max_lat_word_len], dtype=np.float32) vec[0:word_len] = np.ones(shape=word_len, dtype=np.float32) kl_mask.append(vec) kl_mask = np.asarray(kl_mask) kl_mask = np.reshape(kl_mask, newshape=[-1, batch_size, max_lat_word_len]) sentence_lens_nwords = np.reshape(sentence_lens_nwords, newshape=[-1, batch_size]) sentence_lens_nchars = np.reshape(sentence_lens_nchars, newshape=[-1, batch_size]) lat_sent_len_list = np.reshape(lat_sent_len_list, [-1, batch_size]) #shaping for validation set batch_size_val = batch_size n_valid = np.shape(onehot_words_val)[0] r = n_valid % batch_size_val n_valid_use = n_valid - r #might have to fix this before reporting results onehot_words_val = np.reshape( onehot_words_val[0:n_valid_use, ...], newshape=[-1, batch_size_val, max_char_len, vocabulary_size]) word_pos_val = np.reshape(word_pos_val[0:n_valid_use, ...], newshape=[-1, batch_size_val, max_char_len]) #sentence_lens_nwords_val = np.reshape(sentence_lens_nwords_val[0:n_valid_use],newshape=[-1,batch_size_val]) sentence_lens_nchars_val = np.reshape( sentence_lens_nchars_val[0:n_valid_use], newshape=[-1, batch_size_val]) ###KL annealing parameters shift = 5000 total_steps = np.round(np.true_divide(n_epochs, 20) * np.shape(onehot_words)[0], decimals=0) #### cost, reconstruction, kl_p3, kl_p1, kl_global, kl_p2, anneal, _ = decoder.calc_cost( eow_mask=None, mask_kl=mask_kl_pl, kl=True, sentence_word_lens=sent_word_len_list_pl, shift=shift, total_steps=total_steps, global_step=global_step, global_latent_sample=global_latent_o, global_logsig=global_logsig_o, global_mu=global_mu_o, predictions=out_o, true_input=onehot_words_pl, posterior_logsig=logsig_state_out_p, posterior_mu=mean_state_out_p, post_samples=word_state_out_p, reuse=None) ###### # Train Step # clipping gradients ###### lr = 1e-4 opt = tf.train.AdamOptimizer(lr) grads_t, vars_t = zip(*opt.compute_gradients(cost)) clipped_grads_t, grad_norm_t = tf.clip_by_global_norm(grads_t, clip_norm=5.0) train_step = opt.apply_gradients(zip(clipped_grads_t, vars_t), global_step=global_step) regex = re.compile('[^a-zA-Z]') #sum_grad_hist = [tf.summary.histogram(name=regex.sub('',str(j)),values=i) for i,j in zip(clipped_grads_t,vars_t)] norm_grad = tf.summary.scalar(name='grad_norm', tensor=grad_norm_t) ###### #testing stuff #testing pls sent_word_len_list_pl_val = tf.placeholder(name='word_lens_val', dtype=tf.int32, shape=[batch_size]) perm_mat_pl_val = tf.placeholder(name='perm_mat_val', dtype=tf.int32, shape=[batch_size, max_lat_word_len]) onehot_words_pl_val = tf.placeholder( name='onehot_words_val', dtype=tf.float32, shape=[batch_size, max_char_len, vocabulary_size]) word_pos_pl_val = tf.placeholder(name='word_pos_val', dtype=tf.float32, shape=[batch_size, max_char_len]) sent_char_len_list_pl_val = tf.placeholder(name='sent_char_len_list_val', dtype=tf.float32, shape=[batch_size]) #testing graph word_state_out_val, mean_state_out_val, logsig_state_out_val = encoder_k.run_encoder( sentence_lens=sent_char_len_list_pl_val, train=False, inputs=onehot_words_pl_val, word_pos=word_pos_pl_val, reuse=True) perm_mat_val, _, lat_sent_len_list_val = prep_perm_matrix( batch_size=batch_size_val, word_pos_matrix=word_pos_val, max_char_len=max_char_len, max_word_len=max_lat_word_len) kl_mask_val = [] for word_len in np.reshape(lat_sent_len_list_val, -1): vec = np.zeros([max_lat_word_len], dtype=np.float32) vec[0:word_len] = np.ones(shape=word_len, dtype=np.float32) kl_mask_val.append(vec) kl_mask_val = np.asarray(kl_mask_val) kl_mask_val = np.reshape(kl_mask_val, newshape=[-1, batch_size, max_lat_word_len]) lat_sent_len_list_val = np.reshape(np.reshape(lat_sent_len_list_val, -1)[0:n_valid_use], newshape=[-1, batch_size_val]) word_state_out_val.set_shape([max_char_len, batch_size_val, hidden_size]) mean_state_out_val.set_shape([max_char_len, batch_size_val, hidden_size]) logsig_state_out.set_shape([max_char_len, batch_size_val, hidden_size]) word_state_out_p_val = permute_encoder_output( encoder_out=word_state_out_val, perm_mat=perm_mat_pl_val, batch_size=batch_size_val, max_word_len=max_lat_word_len) mean_state_out_p_val = permute_encoder_output( encoder_out=mean_state_out_val, perm_mat=perm_mat_pl_val, batch_size=batch_size_val, max_word_len=max_lat_word_len) logsig_state_out_p_val = permute_encoder_output( encoder_out=logsig_state_out_val, perm_mat=perm_mat_pl_val, batch_size=batch_size_val, max_word_len=max_lat_word_len) out_o_val, global_latent_o_val, global_logsig_o_val, global_mu_o_val = decoder.run_decoder( word_sequence_length=sent_word_len_list_pl_val, train=False, reuse=True, units_lstm_decoder=decoder_dim, lat_words=mean_state_out_p_val, units_dense_global=decoder.global_lat_dim, char_sequence_length=tf.cast(sent_char_len_list_pl_val, dtype=tf.int32)) #test cost test_cost = decoder.test_calc_cost( mask_kl=mask_kl_pl, sentence_word_lens=sent_word_len_list_pl_val, posterior_logsig=logsig_state_out_p_val, post_samples=word_state_out_p_val, global_mu=global_mu_o_val, global_logsig=global_logsig_o_val, global_latent_sample=global_latent_o_val, posterior_mu=mean_state_out_p_val, true_input=onehot_words_pl_val, predictions=out_o_val) ###### ###### #prior sampling samples = np.random.normal(size=[batch_size, decoder.global_lat_dim]) gen_samples = decoder.generation(samples=samples) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) ### IW eval NLL, bpc = n_samples_IW(n_samples=10, encoder=encoder_k, decoder=decoder, decoder_dim=decoder_dim, sent_char_len_list_pl=sent_char_len_list_pl_val, true_output=onehot_words_pl_val, onehot_words_pl=onehot_words_pl_val, word_pos_pl=word_pos_pl_val, perm_mat_pl=perm_mat_pl_val, batch_size=batch_size, max_lat_word_len=max_lat_word_len, sent_word_len_list_pl=sent_word_len_list_pl_val) sum_NLL = tf.summary.scalar(tensor=NLL, name='10sample_IWAE_LL') sum_bpc = tf.summary.scalar(tensor=bpc, name='10sample_IWAE_BPC') ### ###### #tensorboard stuff summary_inf_train = tf.summary.merge([ norm_grad, decoder.kls_hist, decoder.global_kl_scalar, decoder.rec_scalar, decoder.cost_scalar, decoder.full_kl_scalar, decoder.sum_all_activ_hist, decoder.sum_global_activ_hist ]) summary_inf_test = tf.summary.merge( [sum_NLL, sum_bpc, decoder.sum_rec_val, decoder.sum_kl_val]) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) ###### log_file = log_dir + "vaelog.txt" logger = logging.getLogger('mVAE_log') hdlr = logging.FileHandler(log_file) formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.setLevel(logging.DEBUG) for epoch in range(n_epochs): inds = range(np.shape(onehot_words)[0]) np.random.shuffle(inds) for count, batch in enumerate(inds): anneal_c_o, train_predictions_o_np, train_cost_o_np, _, global_step_o_np, train_rec_cost_o_np, _, _, _, _, summary_inf_train_o = sess.run( [ anneal, out_o, cost, train_step, global_step, reconstruction, kl_p3, kl_p1, kl_global, kl_p2, summary_inf_train ], feed_dict={ mask_kl_pl: kl_mask[batch], onehot_words_pl: onehot_words[batch], word_pos_pl: word_pos[batch], perm_mat_pl: perm_mat[batch], sent_word_len_list_pl: lat_sent_len_list[batch], sent_char_len_list_pl: sentence_lens_nchars[batch] }) #logger.debug('anneal const {}'.format(anneal_c)) #logger.debug('ground truth {}'.format(get_output_sentences(index2token, ground_truth[0:10]))) if global_step_o_np % 1 == 0: # testing on the validation set rind = np.random.randint(low=0, high=np.shape(onehot_words_val)[-1]) val_predictions_o_np, val_cost_o_np, summary_inf_test_o = sess.run( [out_o_val, test_cost, summary_inf_test], feed_dict={ mask_kl_pl: kl_mask_val[rind], onehot_words_pl_val: onehot_words_val[rind], word_pos_pl_val: word_pos_val[rind], perm_mat_pl_val: perm_mat_val[rind], sent_word_len_list_pl_val: lat_sent_len_list_val[rind], sent_char_len_list_pl_val: sentence_lens_nchars_val[rind] }) predictions = np.argmax(train_predictions_o_np[0:10], axis=-1) ground_truth = np.argmax(onehot_words[batch][0:10], axis=-1) val_predictions = np.argmax(val_predictions_o_np, axis=-1) true = np.argmax(onehot_words_val[rind], -1) num = np.sum([ np.sum(val_predictions[j][0:i] == true[j][0:i]) for j, i in enumerate(sentence_lens_nchars_val[rind]) ]) denom = np.sum(sentence_lens_nchars_val[rind]) accuracy = np.true_divide(num, denom) * 100 logger.debug( 'accuracy on random val batch {}'.format(accuracy)) logger.debug('predictions {}'.format( [[index2token[j] for j in i] for i in predictions[0:10, 0:50]])) logger.debug('ground truth {}'.format( [[index2token[j] for j in i] for i in ground_truth[0:10, 0:50]])) logger.debug( 'global step: {} Epoch: {} count: {} anneal:{}'.format( global_step_o_np, epoch, count, anneal_c_o)) logger.debug('train cost: {}'.format(train_cost_o_np)) logger.debug('validation cost {}'.format(val_cost_o_np)) logger.debug('validation predictions {}'.format( [[index2token[j] for j in i] for i in val_predictions[0:10, 0:50]])) summary_writer.add_summary(summary_inf_test_o, global_step_o_np) summary_writer.flush() if global_step_o_np % 1000 == 0: # testing on the generative model gen_o_np = sess.run([gen_samples]) gen_pred = np.argmax(gen_o_np[0:10], axis=-1) logger.debug('GEN predictions {}'.format( [[index2token[j] for j in i] for i in gen_pred[0][0:10, 0:50]])) summary_writer.add_summary(summary_inf_train_o, global_step_o_np) summary_writer.flush()
OP_SUB: 3, OP_MUL: 3, OP_DIV: 3, OP_MOD: 3, OP_AND: 3, OP_OR: 3, OP_LT: 3, OP_GT: 3, OP_EQ: 3, OP_NOT: 1, OP_JMP: 2, OP_STORE: 2, OP_ENDGA: 1, OP_NOP: 1 } if __name__ == "__main__": from binary_reader import BinaryReader from decoder import Decoder from memory import Memory bin_instructions = BinaryReader.read_instructions( "../../sdvu/cfg/adding.6.out.2") for instr in bin_instructions: print(hex(instr)) memory = Memory(128, 0x22221111333333332222222200000001) simulator = Core(Decoder(bin_instructions), 2, 128) simulator.setup_cfg_memory(memory) simulator.process_instructions()
class Simulation: def __init__(self): self.init_corpus() def init_corpus(self): self.corpus = [] lines = open('corpus.txt').readlines() for i in range(20000): tags = lines[i].split(' ') word = tags[0] pri = float(tags[1]) self.corpus.append([word, pri]) def calc_letter_distribution(self, **kwargs): data_list = kwargs['data_list'] task_list = kwargs['task_list'] assert (len(data_list) == len(task_list)) QWERTY = ['QWERTYUIOP', 'ASDFGHJKL', 'ZXCVBNM'] self.letter_positions = [[-1, -1] for i in range(26)] self.letter_fingers = np.zeros((26, 10)) self.letter_distributions = [ [[-1, -1, 0.1, 0.1, 0.1, 0] for finger in range(10)] for alpha in range(26) ] # Formal = [xc, yc, std_x2, std_y2, std_xy, p] for r in range(3): line = QWERTY[r] for c in range(len(line)): ch = line[c] alpha = ord(ch) - ord('A') self.letter_positions[alpha] = [c, r] features = [[[] for finger in range(10)] for alpha in range(26)] for data, task in zip(data_list, task_list): assert (len(data) == len(task)) for i in range(len(task)): letter = task[i] if letter.isalpha(): alpha = ord(letter) - ord('a') feature = Decoder.get_feature(data[i]) finger = Decoder.get_finger(data[i]) # finger = 0 # If not using fingering model features[alpha][finger].append(feature) for alpha in range(26): for finger in range(10): points = np.array(features[alpha][finger]) if len(points) >= 1: self.letter_fingers[alpha][finger] += len(points) X = points[:, 0] Y = points[:, 1] if len(points) >= 5: # Remove > 3_std n_std = 3 xc, x_std = np.mean(X), np.std(X) yc, y_std = np.mean(Y), np.std(Y) pack = zip(X.copy(), Y.copy()) X = [] Y = [] for x, y in pack: if abs(x - xc) <= n_std * x_std and abs( y - yc) <= n_std * y_std: X.append(x) Y.append(y) xc = np.mean(X) yc = np.mean(Y) #plt.scatter(X, Y, color=('C'+str(alpha)), s = 5) #plt.scatter(xc, yc, color='red', s = 10) cov = np.array([[0.1, 0], [0, 0.1]]) if len(points) >= 5: cov = np.cov(np.array([X, Y])) std_x2 = cov[0, 0] std_y2 = cov[1, 1] std_xy = (std_x2**0.5) * (std_y2**0.5) p = cov[0, 1] / std_xy assert (not (np.isnan(std_x2) or np.isnan(std_y2) or np.isnan(std_xy))) self.letter_distributions[alpha][finger] = [ xc, yc, std_x2, std_y2, std_xy, p ] if sum(self.letter_fingers[alpha]) != 0: self.letter_fingers[alpha] /= sum(self.letter_fingers[alpha]) std_fingering = np.argmax(self.letter_fingers[alpha]) for finger in range(10): if self.letter_fingers[alpha][finger] == 0: self.letter_distributions[alpha][ finger] = self.letter_distributions[alpha][ std_fingering].copy() self.letter_fingers[alpha][finger] = max( self.letter_fingers[alpha][finger], 0.001) #plt.show() pickle.dump([ self.letter_positions, self.letter_fingers, self.letter_distributions ], open('models/touch.model', 'wb')) self.decoder = Decoder() def input(self): nums = sys.argv[1].split('-') assert (len(nums) == 2) if nums[0].isdigit(): users = [int(nums[0])] else: users = [1, 2, 3, 4, 5, 6, 8, 9, 10, 12] #range(1, 13) if nums[1].isdigit(): sessions = [int(nums[1])] else: sessions = range(1, 6) N = 20 task_list = [] inputted_list = [] data_list = [] for user in users: for session in sessions: folder_path = 'data-study1/' + str(user) + '-' + str( session) + '/' for i in range(N): file_path = folder_path + str(i) + '.pickle' if os.path.exists(file_path): [task, inputted, data] = pickle.load(open(file_path, 'rb')) assert (len(inputted) == len(data) and len(data) == len(data)) task_list.append(task) inputted_list.append(inputted) data_list.append(data) return task_list, inputted_list, data_list def run(self): task_list, inputted_list, data_list = self.input() self.calc_letter_distribution(data_list=data_list, task_list=task_list) ranks = [] for task, inputted, data in zip(task_list, inputted_list, data_list): words = task.split() begin = 0 for word in words: end = begin + len(word) enter = inputted[begin:end] word_data = data[begin:end] if enter == word: pred, rank = self.decoder.predict(word_data, task[:end], word) ranks.append(rank) begin = end + 1 print('===== Top-5 accuracy =====') ranks = np.array(ranks) probs = [] for i in range(5): #prob = sum(ranks == i+1) / len(ranks) prob = sum(ranks == i + 1) / sum(ranks != -1) print('Rank %d = %f' % (i + 1, prob)) if i == 0: TOP_1 = prob return TOP_1
BUFFER_SIZE = len(d_tensor_train) steps_per_epoch = len(d_tensor_train) // BATCH_SIZE + 1 vocab_size = len(tokenizer.word_index) + 1 # create tf.dataset dataset = tf.data.Dataset.from_tensor_slices( (dia_train, res_train, sid_train, aid_train)).shuffle(BUFFER_SIZE) dataset = dataset.batch(BATCH_SIZE, drop_remainder=False) example_input_batch, example_target_batch, example_sid_batch, example_aid_batch = next( iter(dataset)) print("Create dataset done.") encoder = Encoder(HIDDEN_SIZE, vocab_size, embedding_dim, NUM_LAYER, BATCH_SIZE) decoder = Decoder(HIDDEN_SIZE, vocab_size, embedding_dim, speaker_dim, NUM_LAYER) optimizer = tf.keras.optimizers.Adam() # train_nn_s = Train(encoder, decoder, optimizer, tokenizer) # speaker mode train_nn_sa = Train(encoder, decoder, optimizer, tokenizer) # speaker-addressee mode # cp = tf.train.Checkpoint(optimizer=optimizer, # encoder=encoder, # decoder=decoder) # status = cp.restore("persona_checkpoint/speaker-ckpt-5") # print(status) # train_nn_s = Train(encoder, decoder, optimizer, tokenizer) # speaker mode restore print("Start training") #################################### test Way ###########################################
if parsed_args.port == []: parsed_args.port = [DEFAULT_TCP_PORT + i for i in range(num_bundles)] elif len(parsed_args.port) == 1 and 1 < num_bundles: initial_port = parsed_args.port[0] parsed_args.port = [initial_port + i for i in range(num_bundles)] elif len(parsed_args.port) != num_bundles: sys.stderr.write('ERROR: TCP ports specified incorrectly.\n') cli_parser.print_help() sys.exit(2) return parsed_args api.add_resource(TranslationEngine, '/joshua/translate/<string:target_lang_code>') if __name__ == '__main__': args = handle_cli_args(sys.argv) for idx, bundle_confs in enumerate(zip(args.bundle_dir, args.port)): bundle, port = bundle_confs decoder = Decoder(bundle, port) decoder.start_decoder_server() lang_pair = (args.source_lang[idx], args.target_lang[idx]) decoders[lang_pair] = decoder app.run(debug=True, use_reloader=False) #app.run()
cli.connexion.close() serv.th_Listen.join() serv.mySocket.close() # pour laisser le temps au message associé à la fermeture de la connexion du # client de se print correctement sleep(0.1) log.debug("Thread d'écoute du serveur supprimé.") log.debug("Serveur supprimé.\n\n") # # # -------------------------DATA DECODING TO IMAGE-------------------------- # # # dec = Decoder() # bitstream (données reçues) --> frame RLE dec_rle_data = dec.decode_bitstream_RLE(received_data) t_fin_conversion_bitstream_recu_RLE = time() duree_conversion_bitstream_recu_RLE = t_fin_conversion_bitstream_recu_RLE - t_fin_conversion_RLE_bitstream_et_passage_reseau log.debug(f"Transmission réseau réussie : {str(rle_data == dec_rle_data).upper()}\n") # frame RLE --> frame YUV dec_yuv_data = dec.recompose_frame_via_iDTT(dec_rle_data, img_size, macroblock_size, P, S) t_fin_conversion_RLE_YUV = time() duree_conversion_RLE_YUV = t_fin_conversion_RLE_YUV - t_fin_conversion_bitstream_recu_RLE # frame YUV --> frame RGB dec_rgb_data = dec.YUV_to_RGB(np.array(dec_yuv_data, dtype=float))
sample_output, sample_hidden = ans_sent_encoder(example_ans_sent_batch, training=True) tf.debugging.assert_shapes([(sample_output, (BATCH_SIZE, max_length_inp, units))]) if layer == 1: tf.debugging.assert_shapes([(sample_hidden, (BATCH_SIZE, units))]) else: tf.debugging.assert_shapes([(sample_hidden, (layer, BATCH_SIZE, units))]) decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE, targ_tokenizer.word_index['<start>'], targ_tokenizer.word_index['<end>'], attention_type='luong', max_length_inp=max_length_inp, max_length_targ=max_length_targ, embedding_matrix=targ_embedding_matrix, pretraine_embeddings=pretrained, num_layers=layer, dropout=dropout) sample_x = tf.random.uniform((BATCH_SIZE, max_length_targ), dtype=tf.dtypes.float32) decoder.attention_mechanism.setup_memory(sample_output) initial_state = decoder.build_initial_state(BATCH_SIZE, sample_hidden, tf.float32) sample_decoder_outputs = decoder(sample_x, initial_state, training=True) tf.debugging.assert_shapes([(sample_decoder_outputs.rnn_output, (BATCH_SIZE, max_length_targ - 1, vocab_tar_size)) ])
def __init__(self): self.decoder = Decoder() self.unpacker = msgpack.Unpacker(object_pairs_hook=OrderedDict)
def __init__(self): self.decoder = Decoder()
from kaitai.photon import Photon from decoder import Decoder from scapy.all import sniff, raw from py2neo import Graph, Node, Relationship from ogm import get_item, get_character from datetime import datetime from utility import convert_to_ts, alert from items import item_dict import json decoder = Decoder() graph = Graph(password='******') def preprocess_message(m): m = m.replace(b'true', b'True') m = m.replace(b'false', b'False') m = m.replace(b'null', b'None') return eval(m) def handle_messages(messages): for m in messages: m = preprocess_message(m) item = get_item(m) char = get_character(m) msg = Relationship( char, m['AuctionType'], item,
def __init__(self,wavfile,transcription): self.wavfile = wavfile Decoder.__init__(self,wavfile,transcription)
def main(args): # Construct Solver # data tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in, args.maxlen_out) cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in, args.maxlen_out) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, num_workers=args.num_workers) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, num_workers=args.num_workers) # load dictionary and generate char_list, sos_id, eos_id char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} # model #import pdb #pdb.set_trace() encoder = Encoder(args.einput, args.ehidden, args.elayer, vocab_size, dropout=args.edropout, bidirectional=args.ebidirectional, rnn_type=args.etype, ctc_las=True) decoder = Decoder(vocab_size, args.dembed, sos_id, eos_id, args.dhidden, args.dlayer, bidirectional_encoder=args.ebidirectional) #lstm_model = Lstmctc.load_model(args.continue_from) model = Seq2Seq(encoder, decoder, ctc_las=True) model_dict = model.state_dict() print(model) #print(lstm_model) pretrained_dict = torch.load(args.ctc_model) #pretrained_dict = {k: v for k, v in pretrained_dict['state_dict'].items() if k in model_dict} pretrained_dict = {(k.replace('lstm', 'encoder')): v for k, v in pretrained_dict['state_dict'].items() if (k.replace('lstm', 'encoder')) in model_dict} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) for k, v in model.named_parameters(): if k.startswith("encoder"): print(k) v.requires_grad = False model.cuda() # optimizer if args.optimizer == 'sgd': optimizier = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.l2) elif args.optimizer == 'adam': optimizier = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2) else: print("Not support optimizer") return # solver ctc = 0 solver = Solver(data, model, optimizier, ctc, args) solver.train()
def __init__(self): """Constructor""" self.generator = Generator() self.decoder = Decoder()
def test(): ''' main function to run the testing ''' encoder = Encoder(encoder_params[0], encoder_params[1]).cuda() decoder = Decoder(decoder_params[0], decoder_params[1]).cuda() net = ED(encoder, decoder) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: net = nn.DataParallel(net) net.to(device) # 加载待测试模型 if os.path.exists(args.model_path): # load existing model print('==> loading existing model ' + args.model_path) model_info = torch.load(args.model_path) net.load_state_dict(model_info['state_dict']) model_dir = args.model_path.split('/')[-2] else: raise Exception("Invalid model path!") # 创建存储可视化图片的路径 if not os.path.isdir(args.vis_dir): os.makedirs(args.vis_dir) class_weights = torch.FloatTensor([1.0, 15.0]).cuda() lossfunction = nn.CrossEntropyLoss(weight=class_weights).cuda() # to track the testing loss as the model testing test_losses = [] # to track the average testing loss per epoch as the model testing avg_test_losses = [] ###################### # test the model # ###################### with torch.no_grad(): net.eval() # 将module设置为 eval mode,只影响dropout和batchNorm # tqdm 进度条 t = tqdm(testLoader, total=len(testLoader)) for i, (seq_len, scan_seq, label_seq, mask_seq, label_id) in enumerate(t): # 序列长度不固定,至少前2帧用来输入,固定预测后3帧 inputs = inputs = torch.cat((scan_seq, mask_seq.float()), dim=2).to(device)[:, :-3, ...] # B,S,C,H,W label = mask_seq.to(device)[:, (seq_len - 3):, ...] # B,S,C,H,W pred = net(inputs) SaveVis(model_dir, i, scan_seq.to(device), mask_seq.to(device), pred) seq_number, batch_size, input_channel, height, width = pred.size() pred = pred.reshape(-1, input_channel, height, width) # reshape to B*S,C,H,W seq_number, batch_size, input_channel, height, width = label.size() label = label.reshape(-1, height, width) # reshape to B*S,H,W label = label.to(device=device, dtype=torch.long) loss = lossfunction(pred, label) loss_aver = loss.item() / (label.shape[0]) # record test loss test_losses.append(loss_aver) t.set_postfix({ 'test_loss': '{:.6f}'.format(loss_aver), 'cnt': '{:02d}'.format(i) }) # 参数中限制了要测试的样本数量 if i >= args.sample and args.sample > 0: break torch.cuda.empty_cache() # print test statistics # calculate average loss over an epoch test_loss = np.average(test_losses) avg_test_losses.append(test_loss) # epoch_len = len(str(args.epochs)) test_losses = []
if hyperP['load_pretrain_code_embed']: model.decoder.embed[0].load_state_dict(torch.load('./pretrain_code_lm/embedding-1556211835.t7')) if hyperP['freeze_embed']: for param in model.decoder.embed[0].parameters(): param.requires_grad = False model_type = "" if args.atten: model_type = "atten" else: model_type = "simple" model_path = "./models/models_%s_%s"%(args.data_mode, model_type) model.load(os.path.join(model_path, "model_100.t7")) # model.load('model_100.t7') beam_decoder = Decoder(model, model_type=model_type) if is_cuda: model.to(device) # beam_decoder.to(device) model.eval() # input('check gpu location') sos = special_symbols['code_sos'] eos = special_symbols['code_eos'] unk = special_symbols['code_unk'] idx2code = code_intent_pair.idx2code intent2idx = code_intent_pair.intent2idx dummy_code_list = [] true_code_list = []
class PointerNetwork(object): def __init__(self, max_len, batch_size, num_units=32, input_size=1): self.encoder = Encoder(num_units=num_units) self.decoder = Decoder(num_units=num_units) self.encoder_inputs = [] self.decoder_inputs = [] self.decoder_targets = [] self.target_weights = [] self.input_size = input_size self.batch_size = batch_size for i in range(max_len): self.encoder_inputs.append( tf.placeholder(tf.float32, [batch_size, input_size], name="EncoderInput%d" % i)) for i in range(max_len + 1): self.decoder_inputs.append( tf.placeholder(tf.float32, [batch_size, input_size], name="DecoderInput%d" % i)) self.decoder_targets.append( tf.placeholder(tf.int32, [batch_size, 1], name="DecoderTarget%d" % i)) self.target_weights.append( tf.placeholder(tf.float32, [batch_size, 1], name="TargetWeight%d" % i)) def create_feed_dict(self, encoder_input_data, decoder_input_data, decoder_target_data): feed_dict = {} for placeholder, data in zip(self.encoder_inputs, encoder_input_data): feed_dict[placeholder] = data for placeholder, data in zip(self.decoder_inputs, decoder_input_data): feed_dict[placeholder] = data for placeholder, data in zip(self.decoder_targets, decoder_target_data): feed_dict[placeholder] = data for placeholder in self.target_weights: feed_dict[placeholder] = np.ones([self.batch_size, 1]) return feed_dict def build(self, feed_prev=False): encoder_outputs, final_state = self.encoder.encode(self.encoder_inputs) encoder_inputs = [tf.zeros([self.batch_size, 1])] + self.encoder_inputs decoder_inputs = self.decoder_inputs if not feed_prev else [ self.decoder_inputs[0] ] * len(self.decoder_inputs) outputs, states, inps = self.decoder.decode(decoder_inputs, final_state, encoder_outputs, encoder_inputs, feed_prev) outputs = [tf.expand_dims(e, 1) for e in outputs] outputs = tf.concat(outputs, 1) targets = tf.concat(self.decoder_targets, 1) weights = tf.concat(self.target_weights, 1) print(outputs, targets, weights) loss = melt.seq2seq.sequence_loss_by_example(outputs, targets, weights) loss = tf.reduce_mean(loss) predicts = tf.to_int32(tf.argmax(outputs, 2)) correct_predict_ratio = tf.reduce_mean( tf.to_float(melt.sequence_equal(predicts, targets))) return loss, correct_predict_ratio, predicts, targets
def calc_letter_distribution(self, **kwargs): data_list = kwargs['data_list'] task_list = kwargs['task_list'] assert (len(data_list) == len(task_list)) QWERTY = ['QWERTYUIOP', 'ASDFGHJKL', 'ZXCVBNM'] self.letter_positions = [[-1, -1] for i in range(26)] self.letter_fingers = np.zeros((26, 10)) self.letter_distributions = [ [[-1, -1, 0.1, 0.1, 0.1, 0] for finger in range(10)] for alpha in range(26) ] # Formal = [xc, yc, std_x2, std_y2, std_xy, p] for r in range(3): line = QWERTY[r] for c in range(len(line)): ch = line[c] alpha = ord(ch) - ord('A') self.letter_positions[alpha] = [c, r] features = [[[] for finger in range(10)] for alpha in range(26)] for data, task in zip(data_list, task_list): assert (len(data) == len(task)) for i in range(len(task)): letter = task[i] if letter.isalpha(): alpha = ord(letter) - ord('a') feature = Decoder.get_feature(data[i]) finger = Decoder.get_finger(data[i]) # finger = 0 # If not using fingering model features[alpha][finger].append(feature) for alpha in range(26): for finger in range(10): points = np.array(features[alpha][finger]) if len(points) >= 1: self.letter_fingers[alpha][finger] += len(points) X = points[:, 0] Y = points[:, 1] if len(points) >= 5: # Remove > 3_std n_std = 3 xc, x_std = np.mean(X), np.std(X) yc, y_std = np.mean(Y), np.std(Y) pack = zip(X.copy(), Y.copy()) X = [] Y = [] for x, y in pack: if abs(x - xc) <= n_std * x_std and abs( y - yc) <= n_std * y_std: X.append(x) Y.append(y) xc = np.mean(X) yc = np.mean(Y) #plt.scatter(X, Y, color=('C'+str(alpha)), s = 5) #plt.scatter(xc, yc, color='red', s = 10) cov = np.array([[0.1, 0], [0, 0.1]]) if len(points) >= 5: cov = np.cov(np.array([X, Y])) std_x2 = cov[0, 0] std_y2 = cov[1, 1] std_xy = (std_x2**0.5) * (std_y2**0.5) p = cov[0, 1] / std_xy assert (not (np.isnan(std_x2) or np.isnan(std_y2) or np.isnan(std_xy))) self.letter_distributions[alpha][finger] = [ xc, yc, std_x2, std_y2, std_xy, p ] if sum(self.letter_fingers[alpha]) != 0: self.letter_fingers[alpha] /= sum(self.letter_fingers[alpha]) std_fingering = np.argmax(self.letter_fingers[alpha]) for finger in range(10): if self.letter_fingers[alpha][finger] == 0: self.letter_distributions[alpha][ finger] = self.letter_distributions[alpha][ std_fingering].copy() self.letter_fingers[alpha][finger] = max( self.letter_fingers[alpha][finger], 0.001) #plt.show() pickle.dump([ self.letter_positions, self.letter_fingers, self.letter_distributions ], open('models/touch.model', 'wb')) self.decoder = Decoder()
# Replace train_idxs with this one to order the batches by sentence length ordered_batch_idxs = np.argsort(np.array([np.count_nonzero(s) for s in c.train_src_idxs]) * -1) xv = c.train_src_idxs.view([('w%d' % i, 'i4') for i in range(32)]) similar_batch_idxs = np.argsort(xv, axis=0, order=[('w%d' % i) for i in range(32)]).flatten() # Process validation data if conf.do_validate: no_val_improvement = 0 early_stopped = False y_val_strings = [c.trg_idx_to_sent(s) for s in c.valid_trg_idxs] X_val, y_val = next(cstm_model.batch_iterator(c.valid_src_idxs, c.valid_trg_idxs, c.valid_src_idxs.shape[0], len(c.trg_vocab))) logging.info("Will validate on (%s) %d sentences." % (conf.valid_prefix, c.valid_src_idxs.shape[0])) logging.info("Training will stop after %d validations without improvement." % conf.max_patience) scorer = MultiBleuScorer() decoder = Decoder(c.trg_vocab["</s>"], c.trg_vocab["<unk>"], conf.beam_size, generate_unk=False) best_val_bleu = BLEUScore() # Validation prediction placeholder y_pred_val = np.ndarray((X_val.shape[0], c.train_src_idxs.shape[1], len(c.trg_vocab))).astype(np.float32) # Process test data if conf.do_test: y_test_strings = [c.trg_idx_to_sent(s) for s in c.test_trg_idxs] X_test, y_test = next(cstm_model.batch_iterator(c.test_src_idxs, c.test_trg_idxs, c.test_src_idxs.shape[0], len(c.trg_vocab))) y_pred_test = np.ndarray((X_test.shape[0], c.train_src_idxs.shape[1], len(c.trg_vocab))).astype(np.float32) logging.info("Will test on (%s) %d sentences." % (conf.test_prefix, c.test_src_idxs.shape[0])) # Create the model logging.info("Creating model...")
def __init__(self, encoder_weights_path): self.encoder = Encoder(encoder_weights_path) self.decoder = Decoder() self.SAModule = SAMod(512)
def translate(sentence): sentence = pre_process(sentence) decoder = Decoder() sentence = decoder.decode(sentence) sentence = post_process(sentence) return sentence
def test_mixed_chars(self): decoder = Decoder()
class Citrocan(App): dec = None update = False stop_ev = threading.Event() d_time = StringProperty() d_date = StringProperty() d_temp = StringProperty() d_vol = StringProperty() d_band = StringProperty() d_name = StringProperty() d_info = StringProperty() d_title = StringProperty() d_memch = StringProperty() d_dx = StringProperty() d_rds = StringProperty() d_rds_ok = BooleanProperty() d_ta = StringProperty() d_ta_ok = BooleanProperty() d_pty = StringProperty() d_pty_ok = BooleanProperty() d_ptyname = StringProperty() d_rdtxt_rnd = StringProperty() d_reg = StringProperty() d_loud = StringProperty() d_icon = StringProperty("icon") d_volbar = NumericProperty() d_alert = StringProperty() d_debug = StringProperty() def build(self): Window.size = (1024, 520) self.dec = Decoder(self.prop_set) Clock.schedule_interval(self.update_time, .5) Clock.schedule_interval(self.visualize, .4) thr = threading.Thread(target=self.get_candata) thr.setDaemon(True) thr.start() def update_time(self, *_): self.d_time = time.strftime("%H %M" if ':' in self.d_time else "%H:%M") self.d_date = time.strftime("%a %d/%m/%Y") def visualize(self, *_): if self.dec and self.update: self.update = False self.dec.visualize() def prop_set(self, var, val): if self.__getattribute__("d_" + var) != val: self.__setattr__("d_" + var, val) def file_receiver(self, on_recv, fname): old_tm = .0 sp = open(fname, "r") for ln in sp: if self.stop_ev.is_set(): break buf = ln.strip() # print("got:", buf) if len(buf): tm, _, b = buf.partition(' ') if old_tm: time.sleep(float(tm) - old_tm) old_tm = float(tm) if b[0] in ('R', 'S'): on_recv(b) sp.close() print("EOF, stop playing.") def serial_receiver(self, on_recv): sp = None while not self.stop_ev.is_set(): if not sp: buf = [] ready = False try: sp = serial.Serial(port=Port, baudrate=460800, timeout=1) except (ValueError, serial.SerialException) as e: print("can't open serial:", e) if self.dec.connected: self.dec.connected = False self.update = True if sp and not ready: try: sp.write("i0\r\n".encode()) except serial.SerialTimeoutException as e: print("can't write to serial:", e) time.sleep(1) if sp: while not self.stop_ev.is_set(): try: r = sp.read(1) except serial.SerialException: sp.close() sp = None r = None if not r: break if r == b'\n': # print("got:", ''.join(buf)) if len(buf): if buf[0] in ('R', 'S'): on_recv(''.join(buf)) elif buf[0] == 'I': ready = True buf = [] elif r >= b' ': buf.append(r.decode()) else: time.sleep(1) if sp: sp.close() def bt_receiver(self, on_recv): BluetoothAdapter = autoclass('android.bluetooth.BluetoothAdapter') UUID = autoclass('java.util.UUID') sock = None while not self.stop_ev.is_set(): if not sock: buf = [] send = None recv = None ready = False if self.dec.connected: self.dec.connected = False self.update = True paired = BluetoothAdapter.getDefaultAdapter().getBondedDevices().toArray() for dev in paired: if dev.getName() == BtName: sock = dev.createRfcommSocketToServiceRecord(UUID.fromString("00001101-0000-1000-8000-00805F9B34FB")) recv = sock.getInputStream() send = sock.getOutputStream() print("wait for connection") try: sock.connect() except Exception as e: sock.close() sock = None print("can't connect bluetooth:", e) break if sock and not ready: print("sending init") send.write("i0\r\n") send.flush() if sock: while not self.stop_ev.is_set(): try: r = recv.read() except Exception as e: print("can't read from bluetooth:", e) sock.close() sock = None r = None if not r: break if r == 13: # print("got:", ''.join(buf)) if len(buf): if buf[0] in ('R', 'S'): on_recv(''.join(buf)) elif buf[0] == 'I': ready = True buf = [] elif r >= 32: buf.append(chr(r)) else: time.sleep(1) if sock: sock.close() def get_candata(self): self.dec.connected = False self.update = True def on_recv(buf): # print("recv:", buf) try: flds = buf.split() cid = int(flds[1], 16) clen = int(flds[2]) cflds = [] for n in range(clen): cflds.append(int(flds[n + 3], 16)) if self.dec and self.dec.decode(cid, clen, cflds): self.dec.connected = True self.update = True except (TypeError, ValueError, IndexError) as e: print("can't decode:", buf, e) if FromFile: self.file_receiver(on_recv, FromFile) elif autoclass: self.bt_receiver(on_recv) else: self.serial_receiver(on_recv) def on_pause(self): return True def on_resume(self): pass def on_stop(self): self.stop_ev.set()
def test_mixed_case(self): decoder = Decoder()
def __init__(self, decoder=None): if decoder is None: self.decoder = Decoder() else: self.decoder = decoder
def test_all_lower(self): decoder = Decoder()
class AttentiveNP(): """ The Attentive Neural Process model. """ def __init__(self, x_size, y_size, r_size, det_encoder_hidden_size, det_encoder_n_hidden, lat_encoder_hidden_size, lat_encoder_n_hidden, decoder_hidden_size, decoder_n_hidden, attention_type): """ :param x_size: An integer describing the dimensionality of the input x :param y_size: An integer describing the dimensionality of the target variable y :param r_size: An integer describing the dimensionality of the embedding / context vector r :param det_encoder_hidden_size: An integer describing the number of nodes per hidden layer in the deterministic encoder NN :param det_encoder_n_hidden: An integer describing the number of hidden layers in the deterministic encoder neural network :param lat_encoder_hidden_size: An integer describing the number of nodes per hidden layer in the latent encoder neural NN :param lat_encoder_n_hidden: An integer describing the number of hidden layers in the latent encoder neural network :param decoder_hidden_size: An integer describing the number of nodes per hidden layer in the decoder neural network :param decoder_n_hidden: An integer describing the number of hidden layers in the decoder neural network :param attention_type: The type of attention to be used. A string, either "multihead", "laplace", "uniform", "dot_product" """ self.x_size = x_size self.y_size = y_size self.r_size = r_size self.det_encoder = DeterministicEncoder(x_size, y_size, r_size, det_encoder_n_hidden, det_encoder_hidden_size, self_att=True, cross_att=True, attention_type="multihead") self.lat_encoder = LatentEncoder((x_size + y_size), r_size, lat_encoder_n_hidden, lat_encoder_hidden_size) self.decoder = Decoder((x_size + r_size + r_size), y_size, decoder_n_hidden, decoder_hidden_size) self.optimiser = optim.Adam( list(self.det_encoder.parameters()) + list(self.lat_encoder.parameters()) + list(self.decoder.parameters())) def train(self, x_train, y_train, x_test, y_test, x_scaler, y_scaler, batch_size, lr, iterations, testing, plotting): """ :param x_train: A tensor with dimensions [N_train, x_size] containing the training data (x values) :param y_train: A tensor with dimensions [N_train, y_size] containing the training data (y values) :param x_test: A tensor with dimensions [N_test, x_size] containing the test data (x values) :param y_test: A tensor with dimensions [N_test, y_size] containing the test data (y values) :param x_scaler: The standard scaler used when testing == True to convert the x values back to the correct scale. :param y_scaler: The standard scaler used when testing == True to convert the predicted y values back to the correct scale. :param batch_size: An integer describing the number of times we should sample the set of context points used to form the aggregated embedding during training, given the number of context points to be sampled N_context. When testing this is set to 1 :param lr: A float number, describing the optimiser's learning rate :param iterations: An integer, describing the number of iterations. In this case it also corresponds to the number of times we sample the number of context points N_context :param testing: A Boolean object; if set to be True, then every 30 iterations the R^2 score and RMSE values will be calculated and printed for both the train and test data :return: """ self.gp_sampler = GPSampler(data=(x_train, y_train)) self.batch_size = batch_size self._max_num_context = x_train.shape[0] self.iterations = iterations #Convert the data for use in PyTorch. x_train = torch.from_numpy(x_train).float() y_train = torch.from_numpy(y_train).float() x_test = torch.from_numpy(x_test).float() y_test = torch.from_numpy(y_test).float() # At prediction time the context points comprise the entire training set. x_tot_context = torch.unsqueeze(x_train, dim=0) y_tot_context = torch.unsqueeze(y_train, dim=0) for iteration in range(iterations): self.optimiser.zero_grad() # Randomly select the number of context points N_context (uniformly from 3 to # N_train) num_context = np.random.randint(low=1, high=self._max_num_context) # Randomly select N_context context points from the training data, a total of # batch_size times. x_context, y_context, x_target, y_target = self.gp_sampler.sample( batch_size=self.batch_size, train_size=50, num_context=num_context, x_min=-4, x_max=4) x_context = torch.from_numpy(x_context).float() y_context = torch.from_numpy(y_context).float() x_target = torch.from_numpy(x_target).float() y_target = torch.from_numpy(y_target).float() # The input to both the deterministic and latent encoder is (x, y)_i for all data points in the set of context # points. input_context = torch.cat((x_context, y_context), dim=2) input_target = torch.cat((x_target, y_target), dim=2) #The deterministic encoder outputs the deterministic embedding r. r = self.det_encoder.forward( x_context, y_context, x_target) #[batch_size, N_target, r_size] # The latent encoder outputs a prior distribution over the latent embedding z (conditioned only on the context points). z_priors, mu_prior, sigma_prior = self.lat_encoder.forward( x_context, y_context) if y_target is not None: z_posteriors, mu_posterior, sigma_posterior = self.lat_encoder.forward( x_target, y_target) zs = [dist.sample() for dist in z_posteriors] #[batch_size, r_size] else: zs = [dist.sample() for dist in z_priors] #[batch_size, r_size] z = torch.cat(zs) z = z.view(-1, self.r_size) # The input to the decoder is the concatenation of the target x values, the deterministic embedding r and the latent variable z # the output is the predicted target y for each value of x. dists_y, _, _ = self.decoder.forward(x_target.float(), r.float(), z.float()) # Calculate the loss log_ps = [ dist.log_prob(y_target[i, ...].float()) for i, dist in enumerate(dists_y) ] log_ps = torch.cat(log_ps) kl_div = [ kl_divergence(z_posterior, z_prior).float() for z_posterior, z_prior in zip(z_posteriors, z_priors) ] kl_div = torch.tensor(kl_div) loss = -(torch.mean(log_ps) - torch.mean(kl_div)) self.losslogger = loss # The loss should generally decrease with number of iterations, though it is not # guaranteed to decrease monotonically because at each iteration the set of # context points changes randomly. if iteration % 200 == 0: print("Iteration " + str(iteration) + ":, Loss = {:.3f}".format(loss.item())) # We can set testing = True if we want to check that we are not overfitting. if testing: r2_train_list = [] rmse_train_list = [] nlpd_train_list = [] r2_test_list = [] rmse_test_list = [] nlpd_test_list = [] #Useful for determining uncertainty due to sampling z. for j in range(10): _, predict_train_mean, predict_train_var = self.predict( x_tot_context, y_tot_context, x_tot_context) predict_train_mean = np.squeeze( predict_train_mean.data.numpy(), axis=0) predict_train_var = np.squeeze( predict_train_var.data.numpy(), axis=0) # We transform the standardised predicted and actual y values back to the original data # space y_train_mean_pred = y_scaler.inverse_transform( predict_train_mean) y_train_var_pred = y_scaler.var_ * predict_train_var y_train_untransformed = y_scaler.inverse_transform( y_train) r2_train = r2_score(y_train_untransformed, y_train_mean_pred) nlpd_train = nlpd(y_train_mean_pred, y_train_var_pred, y_train_untransformed) rmse_train = np.sqrt( mean_squared_error(y_train_untransformed, y_train_mean_pred)) r2_train_list.append(r2_train) rmse_train_list.append(rmse_train) nlpd_train_list.append(nlpd_train) x_test = torch.unsqueeze(x_test, dim=0) _, predict_test_mean, predict_test_var = self.predict( x_tot_context, y_tot_context, x_test) x_test = torch.squeeze(x_test, dim=0) predict_test_mean = np.squeeze( predict_test_mean.data.numpy(), axis=0) predict_test_var = np.squeeze( predict_test_var.data.numpy(), axis=0) # We transform the standardised predicted and actual y values back to the original data # space y_test_mean_pred = y_scaler.inverse_transform( predict_test_mean) y_test_var_pred = y_scaler.var_ * predict_test_var y_test_untransformed = y_scaler.inverse_transform( y_test) indices = np.random.permutation( y_test_untransformed.shape[0])[0:20] r2_test = r2_score(y_test_untransformed[indices, 0], y_test_mean_pred[indices, 0]) rmse_test = np.sqrt( mean_squared_error( y_test_untransformed[indices, 0], y_test_mean_pred[indices, 0])) nlpd_test = nlpd(y_test_mean_pred[indices, 0], y_test_var_pred[indices, 0], y_test_untransformed[indices, 0]) r2_test_list.append(r2_test) rmse_test_list.append(rmse_test) nlpd_test_list.append(nlpd_test) r2_train_list = np.array(r2_train_list) rmse_train_list = np.array(rmse_train_list) nlpd_train_list = np.array(nlpd_train_list) r2_test_list = np.array(r2_test_list) rmse_test_list = np.array(rmse_test_list) nlpd_test_list = np.array(nlpd_test_list) print("\nR^2 score (train): {:.3f} +- {:.3f}".format( np.mean(r2_train_list), np.std(r2_train_list) / np.sqrt(len(r2_train_list)))) #print("RMSE (train): {:.3f} +- {:.3f}".format(np.mean(rmse_train_list) / np.sqrt( #len(rmse_train_list)))) print("NLPD (train): {:.3f} +- {:.3f}".format( np.mean(nlpd_train_list), np.std(nlpd_train_list) / np.sqrt(len(nlpd_train_list)))) print("R^2 score (test): {:.3f} +- {:.3f}".format( np.mean(r2_test_list), np.std(r2_test_list) / np.sqrt(len(r2_test_list)))) #print("RMSE (test): {:.3f} +- {:.3f}".format(np.mean(rmse_test_list), #np.std(rmse_test_list) / np.sqrt(len(rmse_test_list)))) print("NLPD (test): {:.3f} +- {:.3f}\n".format( np.mean(nlpd_test_list), np.std(nlpd_test_list) / np.sqrt(len(nlpd_test_list)))) if iteration % 1000 == 0: if plotting: x_c = x_scaler.inverse_transform(np.array(x_train)) y_c = y_train_untransformed x_t = x_scaler.inverse_transform(np.array(x_test)) y_t = x_t**3 plt.figure(figsize=(7, 7)) plt.scatter(x_c, y_c, color='red', s=15, marker='o', label="Context points") plt.plot(x_t, y_t, linewidth=1, color='red', label="Ground truth") plt.plot(x_t, y_test_mean_pred, color='darkcyan', linewidth=1, label='Mean prediction') plt.plot(x_t[:, 0], y_test_mean_pred[:, 0] - 1.96 * np.sqrt(y_test_var_pred[:, 0]), linestyle='-.', marker=None, color='darkcyan', linewidth=0.5) plt.plot(x_t[:, 0], y_test_mean_pred[:, 0] + 1.96 * np.sqrt(y_test_var_pred[:, 0]), linestyle='-.', marker=None, color='darkcyan', linewidth=0.5, label='Two standard deviations') plt.fill_between( x_t[:, 0], y_test_mean_pred[:, 0] - 1.96 * np.sqrt(y_test_var_pred[:, 0]), y_test_mean_pred[:, 0] + 1.96 * np.sqrt(y_test_var_pred[:, 0]), color='cyan', alpha=0.2) plt.title('Predictive distribution') plt.ylabel('f(x)') plt.yticks([-80, -60, -40, -20, 0, 20, 40, 60, 80]) plt.ylim(-80, 80) plt.xlim(-4, 4) plt.xlabel('x') plt.xticks([-4, -2, 0, 2, 4]) plt.legend() plt.savefig('results/anp_1dreg_crossatt_2selfatt' + str(iteration) + '.png') loss.backward() self.optimiser.step() def predict(self, x_context, y_context, x_target): """ :param x_context: A tensor of dimensions [batch_size, N_context, x_size]. When training N_context is randomly sampled between 3 and N_train; when testing N_context = N_train :param y_context: A tensor of dimensions [batch_size, N_context, y_size] :param x_target: A tensor of dimensions [N_target, x_size] :return dist: The distributions over the predicted outputs y_target :return mu: A tensor of dimensionality [batch_size, N_target, output_size] describing the means of the normal distribution. :return var: A tensor of dimensionality [batch_size, N_target, output_size] describing the variances of the normal distribution. """ r = self.det_encoder.forward(x_context, y_context, x_target) # The latent encoder outputs a distribution over the latent embedding z. dists_z, _, _ = self.lat_encoder.forward(x_context, y_context) zs = [dist.sample() for dist in dists_z] # [batch_size, r_size] z = torch.cat(zs) z = z.view(-1, self.r_size) # The input to the decoder is the concatenation of the target x values, the deterministic embedding r and the latent variable z # the output is the predicted target y for each value of x. dists_y, _, _ = self.decoder.forward(x_target.float(), r.float(), z.float()) # The input to the decoder is the concatenation of the target x values, the deterministic embedding r and the latent variable z # the output is the predicted target y for each value of x. dist, mu, sigma = self.decoder.forward(x_target.float(), r.float(), z.float()) return dist, mu, sigma
microsecond = arg[19:] microsecond += (6-len(microsecond))*'0' microsecond = int(microsecond) new_pulses.append(datetime(year,month,day,hour,minute,second,microsecond)) last = None for p in new_pulses: if last is not None: if show_deltas: print p - last last = p decoder = Decoder() frame_decoder = FrameDecoder() new_symbols = decoder.decode(new_pulses,debug=False) #for ns in new_symbols: # print ns new_data = frame_decoder.decode(new_symbols) for d in new_data: print d.name,"=",d.value,"@",d.timeStamp
def test(): ''' main function to run the training ''' testFolder = MovingMNIST(is_train=False, root='../data/npy-064/', mode ='test', n_frames_input=args.frames_input, n_frames_output=args.frames_output, num_objects=[3]) testLoader = torch.utils.data.DataLoader(testFolder, batch_size=args.batch_size, shuffle=False) if args.convlstm: encoder_params = convlstm_encoder_params decoder_params = convlstm_decoder_params if args.convgru: encoder_params = convgru_encoder_params decoder_params = convgru_decoder_params else: encoder_params = convgru_encoder_params decoder_params = convgru_decoder_params #TIMESTAMP = args.timestamp # restore args CHECKPOINT = args.checkpoint TIMESTAMP = args.timestamp save_dir = './save_model/' + TIMESTAMP args_path = os.path.join(save_dir, 'cmd_args.txt') if os.path.exists(args_path): with open(args_path, 'r') as f: args.__dict__ = json.load(f) args.is_train = False encoder = Encoder(encoder_params[0], encoder_params[1]).cuda() decoder = Decoder(decoder_params[0], decoder_params[1], args.frames_output).cuda() net = ED(encoder, decoder) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: net = nn.DataParallel(net) net.to(device) if os.path.exists(save_dir): # load existing model print('==> loading existing model') model_info = torch.load(CHECKPOINT) net.load_state_dict(model_info['state_dict']) optimizer = torch.optim.Adam(net.parameters()) optimizer.load_state_dict(model_info['optimizer']) else: print('there is no such checkpoint in', save_dir) exit() lossfunction = nn.MSELoss().cuda() # to track the testation loss as the model trains test_losses = [] # to track the average training loss per epoch as the model trains avg_test_losses = [] # mini_val_loss = np.inf preds = [] ###################### # testate the model # ###################### with torch.no_grad(): net.eval() t = tqdm(testLoader, leave=False, total=len(testLoader)) for i, (idx, targetVar, inputVar, _, _) in enumerate(t): if i == 3000: break inputs = inputVar.to(device) #label = targetVar.to(device) pred = net(inputs) #loss = lossfunction(pred, label) preds.append(pred) #loss_aver = loss.item() / args.batch_size # record testation loss #test_losses.append(loss_aver) torch.cuda.empty_cache() # print training/testation statistics # calculate average loss over an epoch #test_loss = np.average(test_losses) #avg_test_losses.append(test_loss) #print_msg = (f'test_loss: {test_loss:.6f}') #print(print_msg) import pickle with open("preds.pkl", "wb") as fp: pickle.dump(preds, fp)
def test_all_caps(self): decoder = Decoder()
class Keyboard: GRID = 50 TASK_NUM = 20 CORPUS_NUM = 20000 VISABLE_NO = 0 VISABLE_TOUCH = 1 VISABLE_ALWAYS = 2 CORRECT_NO = 0 CORRECT_WORD = 1 CORRECT_LETTER = 2 def __init__(self, VISABLE_FEEDBACK=VISABLE_ALWAYS, WORD_CORRECTION=CORRECT_WORD): self.VISABLE_FEEDBACK = VISABLE_FEEDBACK self.WORD_CORRECTION = WORD_CORRECTION self.init_letter_info() self.init_task_list('phrases.txt') self.init_decoder() self.init_inputted_data() self.init_display() self.init_sound() def init_letter_info(self): FINGERS = ['QAZ|P', 'WSX|OL', 'EDC|IK', 'RFV|TGB', 'YHN|UJM'] COLORS = [(0, 64, 0), (64, 0, 64), (64, 64, 0), (0, 64, 64), (0, 0, 64)] self.letter_colors = [] for alpha in range(26): ch = chr(alpha + ord('A')) for (finger, color) in zip(FINGERS, COLORS): if ch in finger: self.letter_colors.append(color) break def init_task_list(self, path): self.task_list = [] self.curr_task_id = 0 lines = open(path).readlines() for line in lines: line = line.lower() self.task_list.append(line.strip('\n')) random.shuffle(self.task_list) self.task_list = self.task_list[:self.TASK_NUM] self.task = self.task_list[self.curr_task_id] def init_decoder(self): self.decoder = Decoder() def init_inputted_data(self): self.redo_phrase() def init_display(self): self.screen = pygame.display.set_mode( (10 * self.GRID + 1, 4 * self.GRID + 1)) pygame.display.set_caption('Qwerty Watch') self.L_row = None # Hightline line self.L_col = None self.R_row = None self.R_col = None def init_sound(self): self.sound_do = pygame.mixer.Sound("sound/do.wav") self.sound_do.set_volume(0.2) self.sound_re = pygame.mixer.Sound("sound/re.wav") self.sound_re.set_volume(0.2) self.sound_mi = pygame.mixer.Sound("sound/mi.wav") self.sound_mi.set_volume(0.2) self.sound_type = pygame.mixer.Sound("sound/type.wav") self.sound_type.set_volume(1.0) def draw(self): GRID = self.GRID image = np.zeros((4 * GRID + 1, 10 * GRID + 1, 3), np.uint8) cv2.rectangle(image, (0, 0), (10 * GRID, GRID - 1), (0, 0, 0), -1) # Draw task and inputted text cv2.putText(image, self.task, (int(GRID * 0.5), int(GRID * 0.4)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1) cv2.putText(image, self.inputted_text + '_', (int(GRID * 0.5), int(GRID * 0.8)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 1) # Draw the keyboard layout for i in range(26): ch = chr(i + ord('A')) pos = self.decoder.positions[i] bg_color = self.letter_colors[i] cv2.rectangle(image, (int(pos[0] * GRID), int( (pos[1] + 1) * GRID)), (int( (pos[0] + 1) * GRID), int((pos[1] + 2) * GRID)), bg_color, -1) cv2.rectangle(image, (int(pos[0] * GRID), int( (pos[1] + 1) * GRID)), (int( (pos[0] + 1) * GRID), int((pos[1] + 2) * GRID)), (255, 255, 255), 1) cv2.putText(image, ch, (int(pos[0] * GRID) + 15, int( (pos[1] + 2) * GRID) - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) # Visable feedback if self.VISABLE_FEEDBACK == self.VISABLE_ALWAYS: if self.L_row != None: row = max(0.5, min(3.5, self.L_row)) row_pixel = int((row - 0.5 + 1) * GRID) image[row_pixel - 2:row_pixel + 3, :5 * GRID] *= 2 if self.R_row != None: row = max(0.5, min(3.5, self.R_row)) row_pixel = int((row - 0.5 + 1) * GRID) image[row_pixel - 2:row_pixel + 3, 5 * GRID:] *= 2 if self.L_col != None: col = max(0.5, min(2.5, self.L_col)) col_pixel = int((2.5 + col) * GRID) image[1 * GRID:4 * GRID, col_pixel - 2:col_pixel + 3] *= 2 if self.R_col != None: col = max(0.5, min(2.5, self.R_col)) col_pixel = int((7.5 - col) * GRID) image[1 * GRID:4 * GRID, col_pixel - 2:col_pixel + 3] *= 2 elif self.VISABLE_FEEDBACK == self.VISABLE_TOUCH: DURATION = 0.5 if time.clock() - self.last_touch_time < DURATION and len( self.inputted_data) > 0: [col, row] = self.get_position(self.inputted_data[-1]) row_pixel = int((row - 0.5 + 2) * GRID) col_pixel = int((col - 0.5 + 1) * GRID) schedule = (time.clock() - self.last_touch_time) / DURATION image[row_pixel - 5:row_pixel + 6, col_pixel - 5:col_pixel + 6] = cv2.add( image[row_pixel - 5:row_pixel + 6, col_pixel - 5:col_pixel + 6], int(255 * (1 - schedule))) elif self.VISABLE_FEEDBACK == self.VISABLE_NO: pass pg_img = pygame.surfarray.make_surface(cv2.transpose(image)) self.screen.blit(pg_img, (0, 0)) pygame.display.flip() def next_phrase(self): self.curr_task_id += 1 print('Phase = %d' % (self.curr_task_id)) self.redo_phrase() if self.curr_task_id >= len(self.task_list): self.curr_task_id = 0 return False self.task = self.task_list[self.curr_task_id] return True def redo_phrase(self): self.inputted_space_cnt = 0 self.inputted_text = '' self.inputted_data = [] self.last_touch_time = -1 def enter_a_letter(self, input_data, input_letter): self.sound_type.play() i = len(self.inputted_text) letter = '' if i < len(self.task): if self.WORD_CORRECTION == self.CORRECT_LETTER: if self.task[ i] == ' ': # can not enter space by inputting letter, when CORRECT_LETTER return '' letter = self.task[i] else: letter = input_letter self.inputted_text += letter self.inputted_data.append(input_data) self.last_touch_time = time.clock() return letter def enter_a_space(self, input_data): self.sound_type.play() i = len(self.inputted_text) if i == 0 or self.inputted_text[-1] == ' ': # can not enter two spaces return if self.WORD_CORRECTION == self.CORRECT_WORD: tags = self.inputted_text.split(' ') if len(tags) > 0 and tags[-1] != '': word = self.decoder.predict( self.inputted_data[-len(tags[-1]):], self.task[:len(self.inputted_text)]) if word != '': # '' means no match tags[-1] = word self.inputted_text = ' '.join(tags) if i < len(self.task): self.inputted_space_cnt += 1 self.inputted_text += ' ' self.inputted_data.append(input_data) def delete_a_letter(self): self.sound_type.play() if len(self.inputted_text) > 0: self.inputted_text = self.inputted_text[:-1] self.inputted_data = self.inputted_data[:-1] if self.inputted_text == '': self.inputted_space_cnt = 0 def get_position(self, data): # get position from inputted data [side, index, highlight_row, highlight_col] = data[:4] row = max(0 - 0.5, min(2 + 0.5, highlight_row - 1)) col = max(0 - 0.5, min(1 + 0.5, highlight_col - 1)) if side == 'L': if index == 1: col = 3 + col else: col = 3 - (index - 1) if side == 'R': if index == 1: col = 6 - col else: col = 6 + (index - 1) return [col, row]
def main(): import sys # prepare training data for language model and translation model sys.path.append('../../clir/') from aligned_parser import GizaReader data = GizaReader('../../data/giza/', 'alignment-en-fr') # instantiate translation model tm = TranslationModel(data) # n for n-gram language model n = 2 # instantiate language model lm = LanguageModel(n, data) # alpha for reordering model alpha = 0.75 # instantiate reordering model rm = ReorderingModel(alpha) # decoder_stack_threshold for decoder stacks decoder_stack_threshold = 5 # the number of results will be produced to output num_results = 10 translated_src_mask = None last_translated_index = None # interactive command line input print fill("Please enter the full source sentence:", 79) src_sent = to_unicode_or_bust(raw_input(), sys.stdin.encoding).split() for i, word in enumerate(src_sent): print word.encode(sys.stdout.encoding), "(%d)" % i, print print fill("Please enter the partial translation:", 79) partial_tgt_sent = to_unicode_or_bust(raw_input(), sys.stdin.encoding).split() if partial_tgt_sent != []: print fill("Which source words were translated?", 79) translated = list(eval(raw_input())) translated_src_mask = [ i in translated for i in range(len(src_sent))] print translated_src_mask print fill("Which source word is aligned with the last translated word \"%s\"?" % (partial_tgt_sent[-1]), 79) last_translated_index = eval(raw_input()) else: partial_tgt_sent = None # instantiate a decoder with all the input data collected decoder = Decoder(lm, rm, tm, src_sent, decoder_stack_threshold, translated_src_mask, last_translated_index, partial_tgt_sent ) # invoke decode() method to complete decoding decoder.decode() # DEBUG ONLY # print len(decoder._decoder_stacks[-1].decompose()) # for hypo in decoder.decoder_stacks[-1]: # print hypo.last_n_targets(len(src_sent)), hypo.partial_score # print results print print fill("Translation suggestions:", 79) decoder.print_ranked_results(num_results);
def init_decoder(self): self.decoder = Decoder()
class TextCleanser(object): def __init__(self): """Constructor""" self.generator = Generator() self.decoder = Decoder() # print "READY" def heuristic_cleanse(self, text, gen_off_by_ones=False, ssk=False): """Accept noisy text, run through cleanser described in Gouws et al. 2011, and return the cleansed text. If gen_off_by_ones==True, generate spelling variants (1 edit distance away).""" gen = self.generator if ssk: string_sim_func=gen.SSK_SIM else: string_sim_func=gen.IBM_SIM replacements, old_tokens, candidates = gen.sent_generate_candidates(text, string_sim_func, gen_off_by_ones) # print candidates # word_lattice = gen.generate_word_lattice(candidates) word_mesh = gen.generate_word_mesh(candidates) cleantext,error = self.decoder.decode(word_mesh) if error: print "mesh: ", word_mesh print cleantext print error # raw_input("[PRESS ENTER]") # exit(2) # print "clean: ", cleantext replacements = self.get_replacements(cleantext, old_tokens) return cleantext, error, replacements def phonetic_ED_cleanse(self, text, gen_off_by_ones=True): gen = self.generator replacements, old_tokens, candidates = gen.sent_generate_candidates(text, gen.PHONETIC_ED_SIM, gen_off_by_ones) #print candidates # word_lattice = gen.generate_word_lattice(candidates) word_mesh = gen.generate_word_mesh(candidates) cleantext,error = self.decoder.decode(word_mesh) replacements = self.get_replacements(cleantext, old_tokens) return cleantext, error, replacements def ssk_cleanse(self, text, gen_off_by_ones=False): "Use subsequence overlap similarity function" return self.heuristic_cleanse(text, gen_off_by_ones, ssk=True) def log_oovs(self, text): """return a list of all out-of-vocabulary words for pre-processing purposes""" raise NotImplemented("Not yet implemented") def get_replacements(self, cleantext, old_tokens): """return the token replacements that were made""" new_tokens = self.generator.fix_bad_tokenisation(cleantext.split()) # if new_tokens contain more tokens than old_tokens then alignment is screwed if len(new_tokens)>len(old_tokens): replacements = -1 else: replacements = [] for i, new_tok in enumerate(new_tokens): if i >= len(old_tokens): break old_tok = old_tokens[i] if new_tok!=old_tok.lower(): replacements.append((old_tok, new_tok)) return replacements
def __init__(self): self.encoder = Encoder() self.decoder = Decoder()
class Disassembler(object): def __init__(self): self.decoder = Decoder(self) def __getattr__(self, name): if not name.startswith('do_'): raise AttributeError("Disassembler has no attribute '%s'" % name) return name[3:] def disassemble(self, pc, mem, limit_addr=sys.maxint, is_trace=False): try: while pc < limit_addr and pc < len(mem): name, is_byte_insn, args, size = self.decoder.decode(pc, mem) is_ret = self.is_ret(name, args) name, args = self.try_emulate_insn(name, args) full_name = name if name[0] == 'j': arg_str = '$%+x' % args[0] if not is_trace: arg_str += ' [%x]' % (args[0] + pc) else: if is_byte_insn: full_name += '.b' arg_str = (', '.join(map(self.pretty_addr, args))) yield pc, '%s\t' % full_name + arg_str pc += size if (is_ret or name == 'jmp') and not is_trace: break except: yield pc, 'Failed to disassemble.' reg_names = ['pc', 'sp', 'sr', 'cg'] @staticmethod def is_ret(name, args): return name == 'mov' and args[0] == Address(3, 1, None) and \ args[1] == Address(0, 0, None) @staticmethod def pretty_reg(n): if n < 4: return Disassembler.reg_names[n] return 'r%d' % n @staticmethod def pretty_addr(addr): if addr.loc == 2: if addr.mode == 1: return '&%04x' % addr.data elif addr.mode in [2, 3]: return '#%x' % (1 << addr.mode) elif addr.loc == 3: if addr.mode == 3: return '#-1' else: return '#%x' % addr.mode elif addr.mode == 3 and addr.loc == 0: return '#%04x' % addr.data if addr.mode == 0: return Disassembler.pretty_reg(addr.loc) elif addr.mode == 1: return '%x(%s)' % (addr.data, Disassembler.pretty_reg(addr.loc)) elif addr.mode == 2: return '@r%d' % addr.loc else: return '@%s+' % Disassembler.pretty_reg(addr.loc) @staticmethod def try_emulate_insn(name, args): if Disassembler.is_ret(name, args): return 'ret', [] elif name == 'mov' and args[1] == Address(0, 0, None): return 'br', [args[0]] return name, args
def __init__(self): self.decoder = Decoder(self)
""" Python program to realize the simple stenography which implements both coding and decoding part. :Author: Manthan C S :GitHub: mnthnx64 """ from coder import Coder from decoder import Decoder if __name__ == '__main__': cdr = Coder("In all the examples so far, the elements of a are provided by the iterator one at a time, because all the looping logic is internal to the iterator. While this is simple and convenient, it is not very efficient. A better approach is to move the one-dimensional innermost loop into your code, external to the iterator. This way, NumPy’s vectorized operations can be used on larger chunks of the elements being visited.") cdr.encode() dcdr = Decoder() text = dcdr.decode() print(text)
def test_empty_string(self): decoder = Decoder('') self.assertEqual(decoder.decode_message(3), '') self.assertEqual(decoder.decode_message(14), '') self.assertEqual(decoder.decode_message(26), '')