def __init__(self, data_set: DataSet, ngram_size: int, emb_size: int, hid_size: int): """Initialize the language recognition module. Args: data_set: the dataset from which the set of input symbols and output classes (languages) can be extracted ngram_size: size of n-gram features (e.g., use 1 for unigrams, 2 for bigrams, etc.) emb_size: size of the character embedding vectors hid_size: size of the hidden layer of the FFN use for scoring """ # Keep the size of the ngrams self.ngram_size = ngram_size # Calculate the embedding alphabet and create the embedding sub-module feat_set = self.alphabet(data_set) self.register("emb", Embedding(feat_set, emb_size)) # Encoding (mapping between langs and ints) lang_set = set(lang for (_, lang) in data_set) self.enc = Encoding(lang_set) # Scoring FFN sub-module self.register("ffn", FFN(idim=emb_size, hdim=hid_size, odim=len(lang_set))) # Additional check to verify that all the registered # parameters actually require gradients. This allows # to identify the "bug" in the embedding module. assert all([param.requires_grad is True for param in self.params()])
def __init__(self, alphabet: set, emb_size: int): """Create a random embedding dictionary. Arguments: * alphabet: set of symbols to embed (characters, words, POS tags, ...) * emb_size: embedding size (each symbol is mapped to a vector of size emb_size) """ self.emb_size = emb_size self.enc = Encoding(alphabet) self.emb = nn.EmbeddingBag(self.enc.class_num, emb_size, mode='sum')
def __init__(self, *args, **kwargs): font = kwargs.get('font') if font == 'Symbol': self.encoding = Encoding('SymbolEncoding') elif font == 'ZapfDingbats': self.encoding = Encoding('ZapfDingbatsEncoding') else: self.encoding = None self.basefont = font self.tounicode = None
def init_encoding(): encoding = Encoding(peer_id, G, 'encoding', nb_bits_aspath, 5, max_depth, output=True) encoding.compute_encoding() peer_logger.info( str(int(bgp_msg['time'])) + '\t' + str(len(rib)) + '\t' + str(len(W_queue)) + '\t' + 'Encoding computed!') return encoding
def test_update(self): test_deque = deque([set(s) for s in self.test_sets ]) # copy to prevent modification Encoding._update(5, test_deque, self.test_sequence, 5) expected_0 = set([20 + 15, 20 + 25, 20 + 47, 20 + 40]) expected_1 = set([15 + 25, 15 + 47, 15 + 40]) expected_2 = set([25 + 47, 25 + 40]) expected_3 = set([47 + 40]) self.assertEqual(len(test_deque), 4) self.assertEqual(test_deque[0], expected_0) self.assertEqual(test_deque[1], expected_1) self.assertEqual(test_deque[2], expected_2) self.assertEqual(test_deque[3], expected_3)
def run(args): data = read_data(args.corpus_file) chars = list(set(data)) data_size, vocab_size = len(data), len(chars) print("Data has {} characters; {} unique.".format(data_size, vocab_size)) e = Encoding(chars) model = VanillaRNN(encoding=e, input_size=vocab_size, hidden_size=args.hidden_size, sequence_length=args.sequence_length, learning_rate=args.learning_rate) model.fit(data, num_iterations=args.num_iterations) with open(get_output_file(args), 'w') as f: for _ in range(args.num_samples): seed = np.random.randint(low=0, high=vocab_size) seq = model.generate_sequence(seed, args.sample_size) f.write(e.decode(seq)) f.write('\n\n')
def init_encoding(): encoding = Encoding(peer_id, G, 'encoding', nb_bits_aspath, 5, output=True) encoding.compute_encoding() peer_logger.info( str(int(bgp_msg.time)) + '\t' + str(len(rib)) + '\t' + str(len(W_queue)) + '\t' + 'Encoding computed!') if global_rib_enabled: for p in rib.rib: send_fake_update(p, peer_ip, bgp_msg.time, rib, encoding, socket) return encoding
def __init__(self, data_set: DataSet, emb_size: int, hid_size: int): """Initialize the language recognition module. Args: data_set: the dataset from which the set of input symbols and output classes (languages) can be extracted emb_size: size of the character embedding vectors hid_size: size of the hidden layer of the FFN use for scoring """ char_set = char_set_in(data_set) # Embedding self.register("emb", Embedding(char_set, emb_size)) lang_set = set(lang for (name, lang) in data_set) lang_num = len(lang_set) # Encoding (mapping between langs and ints) self.enc = Encoding(lang_set) # FFN self.register("ffn", FFN(idim=emb_size, hdim=hid_size, odim=lang_num))
def test2(self): backend = Aer.get_backend('statevector_simulator') get_result = lambda circuit: execute(circuit, backend).result( ).get_statevector() r1 = get_result( Encoding(np.array(state_default), 'dc_amplitude_encoding').qcircuit) r2 = get_result(gen_circuit(gen_angles(state_default))) assert (np.allclose(r2, r1, rtol=0.01))
class EmbeddingSum(Module): """A lookup table that stores embeddings of a fixed dictionary and size, combined with summing (CBOW). EmbeddingSum is an optimized variant of the Embedding class combined with summing (CBOW). It is intended to be used over bags of features rather than single features. It is based on torch.nn.Embedding (look it up in PyTorch docs). >>> import torch >>> symset = set(['a', 'b', 'c']) >>> emb = EmbeddingSum(symset, emb_size=10) >>> emb.forward(['a', 'b']) #doctest: +ELLIPSIS tensor(...) >>> emb.forward(['a', 'b']).shape torch.Size([10]) """ def __init__(self, alphabet: set, emb_size: int): """Create a random embedding dictionary. Arguments: * alphabet: set of symbols to embed (characters, words, POS tags, ...) * emb_size: embedding size (each symbol is mapped to a vector of size emb_size) """ self.emb_size = emb_size self.enc = Encoding(alphabet) self.emb = nn.EmbeddingBag(self.enc.class_num, emb_size, mode='sum') def forward(self, syms: Iterator) -> TT: """Embed the given bag (sequence) of symbols and compute the sum. Returns: Single vector, which is the sum of the embeddings of the given symbols. """ ixs = [] for sym in syms: try: ixs.append(self.enc.encode(sym)) except KeyError: pass if len(ixs) > 0: ix_tensor = torch.LongTensor(ixs).view(1, -1) return self.emb(ix_tensor)[0] else: return torch.zeros(self.emb_size) def params(self): """The list of parameters of the embedding dictionary.""" return [self.emb.weight]
def to_utf8(self, params): if isinstance(self.encoding, basestring): raise self.UnsupportedFontEncodingException if isinstance(params, basestring): # translate the bytestream into a UTF-8 string # If an encoding hasn't been specified, assume the text using this # font is in Adobe Standard Encoding. enc = self.encoding and self.encoding or Encoding( 'StandardEncoding') return enc.to_utf8(params, self.tounicode) elif isinstance(params, (list, tuple)): return map(lambda param: self.to_utf8(param), params) else: return params
def __init__(self, num_blocks=[2, 2, 2, 2], num_classes=10, block=nn2.Bottleneck): super(Net, self).__init__() if block == nn2.Basicblock: self.expansion = 1 else: self.expansion = 4 self.inplanes = 64 num_planes = [64, 128, 256, 512] strides = [1, 2, 2, 2] model = [] # Conv_1 model += [ nn.Conv2d(3, self.inplanes, kernel_size=3, padding=1), nn.BatchNorm2d(self.inplanes), nn.ReLU(inplace=True) ] # Residual units for i in range(4): model += [ self._residual_unit(block, num_planes[i], num_blocks[i], strides[i]) ] # Last conv layer # TODO norm layer, instance norm? model += [ nn.BatchNorm2d(self.inplanes), nn.ReLU(inplace=True), Encoding(D=512 * self.expansion, K=16), nn.BatchNorm1d(16), nn.ReLU(inplace=True), nn2.View(-1, 512 * self.expansion * 16), nn.Linear(512 * self.expansion * 16, num_classes) ] self.model = nn.Sequential(*model) print(model)
def setUp(self): self.encoder = Encoding()
class Test_Encoding(unittest.TestCase): def setUp(self): self.encoder = Encoding() def tearDown(self): self.encoder = None def test_single_character(self): decimal = self.encoder.encode_decimal("A") self.assertEqual(decimal, 16777217) hex_value = self.encoder.encode_hex("A") self.assertEqual(int(hex_value, 16), int('0x01000001', 16)) def test_full_bundle(self): decimal = self.encoder.encode_decimal("FRED") self.assertEqual(decimal, 251792692) hex_value = self.encoder.encode_hex("FRED") self.assertEqual(int(hex_value, 16), int('0x0F020d34', 16)) def test_non_alphanumerics(self): decimal = self.encoder.encode_decimal(" :^)") self.assertEqual(decimal, 79094888) hex_value = self.encoder.encode_hex(" :^)") self.assertEqual(int(hex_value, 16), int('0x04B6E468', 16)) def test_foo(self): decimal = self.encoder.encode_decimal("foo") self.assertEqual(decimal, 124807030) def test_foo_with_space(self): decimal = self.encoder.encode_decimal(" foo") self.assertEqual(decimal, 250662636) def test_foot(self): decimal = self.encoder.encode_decimal("foot") self.assertEqual(decimal, 267939702) def test_BIRD(self): decimal = self.encoder.encode_decimal("BIRD") self.assertEqual(decimal, 251930706) def test_periods(self): decimal = self.encoder.encode_decimal("....") self.assertEqual(decimal, 15794160) def test_carrots(self): decimal = self.encoder.encode_decimal("^^^^") self.assertEqual(decimal, 252706800) def test_Whoot(self): decimal = self.encoder.encode_decimal("Woot") self.assertEqual(decimal, 266956663) def test_no(self): decimal = self.encoder.encode_decimal("no") self.assertEqual(decimal, 53490482) def test_email(self): decimal = self.encoder.encode_decimal("a@b.") self.assertEqual(decimal, 131107009) def test_my_email(self): decimal = self.encoder.encode_decimal("me@a") self.assertEqual(decimal, 263197451) # ----------- Part 2 ---------------------------- def test_endcode_array_tacocat(self): encoded = self.encoder.encode("tacocat") self.assertEqual(encoded, [267487694, 125043731]) def test_decode_FRED(self): decoded = self.encoder.decode_decimal(251792692) self.assertEqual(decoded, "FRED") def test_decode_array_tacocat(self): decoded = self.encoder.decode([267487694, 125043731]) self.assertEqual(decoded, "tacocat") def test_decode_array_never_odd(self): decoded = self.encoder.decode( [267657050, 233917524, 234374596, 250875466, 17830160]) self.assertEqual(decoded, "never odd or even") def test_decode_array_larger(self): decoded = self.encoder.decode( [267394382, 167322264, 66212897, 200937635, 267422503]) self.assertEqual(decoded, "lager, sir, is regal") def test_decode_array_go_hang(self): decoded = self.encoder.decode([ 200319795, 133178981, 234094669, 267441422, 78666124, 99619077, 267653454, 133178165, 124794470 ]) self.assertEqual(decoded, "go hang a salami, I'm a lasagna hog") def test_decode_array_engad(self): decoded = self.encoder.decode([ 267389735, 82841860, 267651166, 250793668, 233835785, 267665210, 99680277, 133170194, 124782119 ]) self.assertEqual(decoded, "egad, a base tone denotes a bad age") def test_bothways(self): self.assertEqual("bothways", self.encoder.decode(self.encoder.encode("bothways")))
class LangRec(Module): # TODO: Implement this method. def __init__(self, data_set: DataSet, emb_size: int, hid_size: int): """Initialize the language recognition module. Args: data_set: the dataset from which the set of input symbols and output classes (languages) can be extracted emb_size: size of the character embedding vectors hid_size: size of the hidden layer of the FFN use for scoring """ char_set = char_set_in(data_set) # Embedding self.register("emb", Embedding(char_set, emb_size)) lang_set = set(lang for (name, lang) in data_set) lang_num = len(lang_set) # Encoding (mapping between langs and ints) self.enc = Encoding(lang_set) # FFN self.register("ffn", FFN(idim=emb_size, hdim=hid_size, odim=lang_num)) # TODO: Implement this method. def encode(self, lang: Lang) -> int: """Encode the given language as an integer.""" return self.enc.encode(lang) # TODO: Implement this method. def forward(self, name: Name) -> TT: """The forward calculation of the name's language recognition model. Args: name: a person name Returns: score vector corresponding to the name, with its individual elements corresponding to the scores of different languages """ embeddings = [self.emb.forward(char) for char in name] cbow = sum(embeddings) scores = self.ffn.forward(cbow) return scores # TODO: Implement this method. def classify(self, name: Name) -> Dict[Lang, float]: """Classify the given person name. Args: name: person name, sequence of characters Returns: the mapping from languages to their probabilities for the given name. """ # We don't want Pytorch to calculate the gradients with torch.no_grad(): # The vector of scores for the given name scores = self.forward(name) # We map the vector of scores to the vector of probabilities. probs = torch.softmax(scores, dim=0) # Result dictionary res = {} # `ix` should be an index in the scores vector for ix in range(len(probs)): lang = self.enc.decode(ix) res[lang] = probs[ix] return res
def test_part_two(self): self.assertEqual(Encoding.part_two('test_input', 5, 127), 62)
def test_sum_to_target(self): enc = Encoding() result = enc._sum_to_target(self.test_sequence, 127) expect = self.test_sequence[2:6] self.assertEqual(result, expect)
class LangRec(Module): def __init__(self, data_set: DataSet, ngram_size: int, emb_size: int, hid_size: int): """Initialize the language recognition module. Args: data_set: the dataset from which the set of input symbols and output classes (languages) can be extracted ngram_size: size of n-gram features (e.g., use 1 for unigrams, 2 for bigrams, etc.) emb_size: size of the character embedding vectors hid_size: size of the hidden layer of the FFN use for scoring """ # Keep the size of the ngrams self.ngram_size = ngram_size # Calculate the embedding alphabet and create the embedding sub-module feat_set = self.alphabet(data_set) self.register("emb", EmbeddingSum(feat_set, emb_size)) # Encoding (mapping between langs and ints) lang_set = set(lang for (_, lang) in data_set) self.enc = Encoding(lang_set) # Scoring FFN sub-module self.register("ffn", FFN(idim=emb_size, hdim=hid_size, odim=len(lang_set))) # Additional check to verify that all the registered # parameters actually require gradients. assert all([param.requires_grad is True for param in self.params()]) def preprocess(self, name: Name) -> Name: """Name preprocessing.""" # Currently no preprocessing, but we could think of something # in the future. return name def features(self, name: Name) -> Iterator[str]: """Retrieve the list of features in the given name.""" return ngrams(self.preprocess(name), self.ngram_size) def alphabet(self, data_set: DataSet) -> Set[str]: """Retrieve the embedding alphabet from the dataset. Retrieve the set of all features that we want to embed from the given dataset. """ return set(feat for (name, _) in data_set for feat in self.features(name)) def encode(self, lang: Lang) -> int: """Encode the given language as an integer.""" return self.enc.encode(lang) def forward(self, names: Iterator[Name]) -> TT: """The forward calculation of the name's language recognition model. Args: names: a sequence of person names; calculating the scores for several names at the same time is faster thanks to better parallelization Returns: score matrix in which each row corresponds to a single name, with its individual elements corresponding to the scores of different languages """ # TODO EX2 (a): the following lines need to be adapted to the EmbeddingSum, # which processes features in groups. You will also need to make # trivial modifications in the code in two or three other places # (imports, initialization). # TODO EX2 (b): you can further try to modify the EmbeddingSum class so # that it works over batches of feature groups. embeddings = [ # [self.emb.forward(feat) for feat in self.features(name)] self.emb.forward(self.features(name)) for name in names ] # cbow = utils.from_rows(map(sum, embeddings)) cbow = utils.stack(embeddings) scores = self.ffn.forward(cbow) return scores def classify(self, name: Name) -> Dict[Lang, float]: """Classify the given person name. Args: name: person name, sequence of characters Returns: the mapping from languages to their probabilities for the given name. """ # We don't want Pytorch to calculate the gradients with torch.no_grad(): # The vector of scores for the given name scores = self.forward([name])[0] # We map the vector of scores to the vector of probabilities. probs = torch.softmax(scores, dim=0) # Result dictionary res = {} # `ix` should be an index in the scores vector for ix in range(len(probs)): lang = self.enc.decode(ix) res[lang] = probs[ix] return res def classify_one(self, name: Name) -> Lang: """A simplified version of `classify` which returns the language with the highest score.""" prob_map = self.classify(name) preds = sorted(prob_map.items(), key=lambda pair: pair[1]) (name, _prob) = preds[-1] return name
def test_preprocess(self): result = Encoding._preprocess([35, 20, 15, 25, 47]) self.assertEqual(len(result), 4) self.assertEqual(len(result[0]), 4) self.assertEqual(len(result[3]), 1) self.assertEqual(result[0], self.test_sets[0])
def test_parse_input(self): self.assertEqual(Encoding._parse_input('test_input'), self.test_sequence)
class LangRec(Module): def __init__(self, data_set: DataSet, ngram_size: int, emb_size: int, hid_size: int): """Initialize the language recognition module. Args: data_set: the dataset from which the set of input symbols and output classes (languages) can be extracted ngram_size: size of n-gram features (e.g., use 1 for unigrams, 2 for bigrams, etc.) emb_size: size of the character embedding vectors hid_size: size of the hidden layer of the FFN use for scoring """ # Keep the size of the ngrams self.ngram_size = ngram_size # Calculate the embedding alphabet and create the embedding sub-module feat_set = self.alphabet(data_set) self.register("emb", Embedding(feat_set, emb_size)) # Encoding (mapping between langs and ints) lang_set = set(lang for (_, lang) in data_set) self.enc = Encoding(lang_set) # Scoring FFN sub-module self.register("ffn", FFN(idim=emb_size, hdim=hid_size, odim=len(lang_set))) # Additional check to verify that all the registered # parameters actually require gradients. This allows # to identify the "bug" in the embedding module. assert all([param.requires_grad is True for param in self.params()]) def preprocess(self, name: Name) -> Name: """Name preprocessing.""" # Currently no preprocessing, but we could think of something # in the future. return name def features(self, name: Name) -> Iterator[str]: """Retrieve the list of features in the given name.""" return ngrams(self.preprocess(name), self.ngram_size) def alphabet(self, data_set: DataSet) -> Set[str]: """Retrieve the embedding alphabet from the dataset. Retrieve the set of all features that we want to embed from the given dataset. """ return set(feat for (name, _) in data_set for feat in self.features(name)) def encode(self, lang: Lang) -> int: """Encode the given language as an integer.""" return self.enc.encode(lang) def forward(self, name: Name) -> TT: """The forward calculation of the name's language recognition model. Args: name: a person name Returns: score vector corresponding to the name, with its individual elements corresponding to the scores of different languages """ embeddings = [self.emb.forward(feat) for feat in self.features(name)] cbow = sum(embeddings) scores = self.ffn.forward(cbow) return scores def classify(self, name: Name) -> Dict[Lang, float]: """Classify the given person name. Args: name: person name, sequence of characters Returns: the mapping from languages to their probabilities for the given name. """ # We don't want Pytorch to calculate the gradients with torch.no_grad(): # The vector of scores for the given name scores = self.forward(name) # We map the vector of scores to the vector of probabilities. probs = torch.softmax(scores, dim=0) # Result dictionary res = {} # `ix` should be an index in the scores vector for ix in range(len(probs)): lang = self.enc.decode(ix) res[lang] = probs[ix] return res def classify_one(self, name: Name) -> Lang: """A simplified version of `classify` which returns the language with the highest score.""" prob_map = self.classify(name) preds = sorted(prob_map.items(), key=lambda pair: pair[1]) (name, _prob) = preds[-1] return name
def test_validate(self): test_deque = deque(self.test_sets) self.assertTrue(Encoding._validate(55, test_deque)) self.assertFalse(Encoding._validate(1000, test_deque)) self.assertTrue(Encoding._validate(25 + 47, test_deque))
def test_part_one(self): self.assertEqual(Encoding.part_one('test_input', 5), 127)
from ctfs import CTFs from fun import Fun from discord.ext.commands import Bot import discord import asyncio import os #app_id = '519995591359594538' TOKEN = os.environ["DISCORD_TOKEN"] BOT_PREFIX = (".", "dad ") client = Bot(command_prefix=BOT_PREFIX) client.add_cog(Hashing(client)) client.add_cog(Encoding(client)) client.add_cog(Ciphers(client)) client.add_cog(Utilities(client)) client.add_cog(CTFs(client)) client.add_cog(Fun(client)) @client.event async def on_ready(): print("--------Logged in as " + client.user.name + "-----------") @client.event async def on_message(message): # print("Command received: " + str(message.content)) await client.process_commands(message)
for file in os.listdir("./data/color"): if file.endswith(".jpg"): print(os.path.join(file)) img = image.load_img("./data/color/" + file, target_size=(img_height, img_height)) # imgG = image.load_img("./data/gray/"+file, target_size=(img_height, img_height)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) # y = image.img_to_array(imgG) # y = np.expand_dims(y, axis=0) enc.fit(x, x) dec.fit(enc.predict(x),x) image_tensor = layers.Input(shape=(512, 512, 3)) encodig_output = Encoding(image_tensor) enc = models.Model(inputs=[image_tensor], outputs=[encodig_output]) decoding_output = Decoding(image_tensor) dec = models.Model(inputs=[image_tensor], outputs=[decoding_output]) enc.compile(optimizer='sgd', loss=lossFunction, metrics=['mse']) dec.compile(optimizer='sgd', loss=lossFunction, metrics=['mse']) # model.compile(optimizer='adam', # loss=tf.keras.losses.MeanSquaredError(), # metrics=['mse']) train(enc, dec)