def decoder(text, context, enc, model): unicode_enc = True mode = 'arithmetic' block_size = 3 # for huffman and bins temp = 0.9 # for arithmetic precision = 26 # for arithmetic sample_tokens = 100 # for sample topk = 300 finish_sent=True context_tokens = encode_context(context, enc) if mode not in ['arithmetic', 'huffman', 'bins', 'sample']: raise NotImplementedError message_ctx = [enc.encoder['<|endoftext|>']] # Decode binary message from bits using the same arbitrary context if mode != 'sample': if mode == 'arithmetic': message_rec = decode_arithmetic(model, enc, text, context_tokens, temp=temp, precision=precision, topk=topk) elif mode == 'huffman': message_rec = decode_huffman(model, enc, text, context_tokens, block_size) elif mode == 'bins': message_rec = decode_block(model, enc, text, context_tokens, block_size, bin2words, words2bin) #print("="*40 + " Recovered Message " + "="*40) #print(message_rec) #print("=" * 80) # Finally map message bits back to original text if unicode_enc: message_rec = [bool(item) for item in message_rec] ba = bitarray.bitarray(message_rec) reconst = ba.tobytes().decode('utf-8', 'ignore') else: reconst = encode_arithmetic(model, enc, message_rec, message_ctx, precision=40, topk=60000) reconst = enc.decode(reconst[0]) return reconst
def encoder(message_str, context, enc, model): unicode_enc = True mode = 'arithmetic' block_size = 3 # for huffman and bins temp = 0.9 # for arithmetic precision = 26 # for arithmetic sample_tokens = 100 # for sample topk = 300 finish_sent=True context_tokens = encode_context(context, enc) if mode not in ['arithmetic', 'huffman', 'bins', 'sample']: raise NotImplementedError if unicode_enc: ba = bitarray.bitarray() ba.frombytes(message_str.encode('utf-8')) message = ba.tolist() else: message_ctx = [enc.encoder['<|endoftext|>']] message_str += '<eos>' message = decode_arithmetic(model, enc, message_str, message_ctx, precision=40, topk=60000) # Next encode bits into cover text, using arbitrary context Hq = 0 if mode == 'arithmetic': out, nll, kl, words_per_bit, Hq = encode_arithmetic(model, enc, message, context_tokens, temp=temp, finish_sent=finish_sent, precision=precision, topk=topk) elif mode == 'huffman': out, nll, kl, words_per_bit = encode_huffman(model, enc, message, context_tokens, block_size, finish_sent=finish_sent) elif mode == 'bins': out, nll, kl, words_per_bit = encode_block(model, enc, message, context_tokens, block_size, bin2words, words2bin, finish_sent=finish_sent) elif mode == 'sample': out, nll, kl, Hq = sample(model, enc, sample_tokens, context_tokens, temperature=temp, topk=topk) words_per_bit = 1 text = enc.decode(out) return text
def main(args): enc, model = get_model(model_name=args.lm) ## PARAMETERS message_str = (args.message if args.message != "" else "This is a very secret message!") unicode_enc = False mode = args.mode block_size = 3 # for huffman and bins temp = 0.9 # for arithmetic precision = 26 # for arithmetic sample_tokens = 100 # for sample topk = 300 finish_sent = False # whether or not to force finish sent. If so, stats displayed will be for non-finished sentence ## VALIDATE PARAMETERS if mode not in ['arithmetic', 'huffman', 'bins', 'sample']: raise NotImplementedError if mode == 'bins': bin2words, words2bin = get_bins(len(enc.encoder), block_size) context = args.context if args.context != "" else \ """Washington received his initial military training and command with the Virginia Regiment during the French and Indian War. He was later elected to the Virginia House of Burgesses and was named a delegate to the Continental Congress, where he was appointed Commanding General of the nation's Continental Army. Washington led American forces, allied with France, in the defeat of the British at Yorktown. Once victory for the United States was in hand in 1783, Washington resigned his commission. """ context_tokens = encode_context(context, enc) # ------------------------------------------------------------------------------------ # ------------------------------------------------------------------------------------ # First encode message to uniform bits, without any context # (not essential this is arithmetic vs ascii, but it's more efficient when the message is natural language) if unicode_enc: ba = bitarray.bitarray() ba.frombytes(message_str.encode('utf-8')) message = ba.tolist() else: message_ctx = [enc.encoder['<|endoftext|>']] message_str += '<eos>' message = decode_arithmetic(model, enc, message_str, message_ctx, precision=40, topk=60000) # Next encode bits into cover text, using arbitrary context Hq = 0 if mode == 'arithmetic': out, nll, kl, words_per_bit, Hq = encode_arithmetic( model, enc, message, context_tokens, temp=temp, finish_sent=finish_sent, precision=precision, topk=topk) elif mode == 'huffman': out, nll, kl, words_per_bit = encode_huffman(model, enc, message, context_tokens, block_size, finish_sent=finish_sent) elif mode == 'bins': out, nll, kl, words_per_bit = encode_block(model, enc, message, context_tokens, block_size, bin2words, words2bin, finish_sent=finish_sent) elif mode == 'sample': out, nll, kl, Hq = sample(model, enc, sample_tokens, context_tokens, temperature=temp, topk=topk) words_per_bit = 1 text = enc.decode(out) print(message) print(len(message)) print("=" * 40 + " Encoding " + "=" * 40) print(text) print( 'ppl: %0.2f, kl: %0.3f, words/bit: %0.2f, bits/word: %0.2f, entropy: %.2f' % (math.exp(nll), kl, words_per_bit, 1 / words_per_bit, Hq / 0.69315)) # Decode binary message from bits using the same arbitrary context if mode != 'sample': if mode == 'arithmetic': message_rec = decode_arithmetic(model, enc, text, context_tokens, temp=temp, precision=precision, topk=topk) elif mode == 'huffman': message_rec = decode_huffman(model, enc, text, context_tokens, block_size) elif mode == 'bins': message_rec = decode_block(model, enc, text, context_tokens, block_size, bin2words, words2bin) print("=" * 40 + " Recovered Message " + "=" * 40) print(message_rec) print("=" * 80) # Finally map message bits back to original text if unicode_enc: message_rec = [bool(item) for item in message_rec] ba = bitarray.bitarray(message_rec) reconst = ba.tobytes().decode('utf-8', 'ignore') else: reconst = encode_arithmetic(model, enc, message_rec, message_ctx, precision=40, topk=60000) reconst = enc.decode(reconst[0]) print(reconst)
def main(args): # Initial process args = vars(args) unicode_enc = args['unicode_enc'] # 选择编码方式 mode = args['mode'] # 选择隐写算法 block_size = args['block_size'] # 隐写参数batch_size temp = args['temp'] # 隐写参数TEMPERATURE,注意下文中最好不要新建temp变量 precision = args['precision'] # 隐写参数 topk = args['topk'] # 文本生成相关参数 device = args['device'] # device,文本生成相关参数,选择GPU/CPU,默认'cuda' finish_sent = args['finish_sent'] # 隐写参数 nucleus = args['nucleus'] # saac相关隐写参数 delta = args['delta'] # saac相关隐写参数 model_name = args['language_model'] # 文本生成模型 context_file = args['context_file'] # 上下文文件的位置 message_str = args['name'] # sample_tokens = 100 # 测试用变量 # PARAMETERS 默认第一次的隐写信息(人名) # message_str = "Chhenl" # string to be hidden. # VALIDATE PARAMETERS 验证隐写算法 if mode not in ['arithmetic', 'huffman', 'bins', 'saac']: raise NotImplementedError # 打印隐写信息(人名) print("Default plain_text is ", message_str) # 读取上下文 f = open(context_file, 'r', encoding='utf-8') context = f.read() f.close() print("sample context is ", context) # related to the text generation procedure. # 加载文本生成模型 print("loading GPT-2 LM to GPU") enc, model = get_model(model_name=model_name) print("finish loading !") print("implication of {}".format(mode)) # bins隐写算法的处理 if mode == 'bins': bin2words, words2bin = get_bins(len(enc.encoder), block_size) # saac隐写算法的处理 if delta and mode == "saac": nucleus = 2**(-1.0 * delta) # 以下注释都为旧调试过程中的注释 # fix situation: directly encode the text. # print("directly encode the plain txt:\n", enc.encode(message_str)) # print("Decode back:\n", enc.decode(enc.encode(message_str))) # can ensure the problem arise in the arithmetic_decode as well as the arithmetic_encode function. # ----------------------start test---------------------------- # test_str = "hello world." # print("test_str = ", test_str) # out = enc.encode(test_str) # print("out = ", out) # decode_str = enc.decode(out) # print("decode_str = ", decode_str) # print("enc.encode(decode_str) = ", enc.encode(decode_str)) # ----------------------stop test----------------------------- # Archive Basic Initialization---------------------------------- # print("plain_text is {}".format(message_str)) # unicode_enc = False # mode = 'huffman' # block_size = 3 # for huffman and bins # temp = 0.9 # for arithmetic # precision = 26 # for arithmetic # sample_tokens = 100 # for sample, delete sample # topk = 300 # device = 'cuda' # finish_sent=False # whether or not to force finish sent. If so, stats displayed will be for non-finished sentence # nucleus = 0.95 # Archive Basic Initialization---------------------------------- first_flag = 1 # 对下文中默认处理的标志 context_tokens = encode_context(context, enc) # 对context进行语言模型相关的编码 while (1): # ---此处在循环中,则会不断等待输入隐写信息(人名)-------------------------------------- # ------------------------------------------------------------------------------------ # list_for_bpw = [] # 用于计算Bits/word参数 # list_for_DKL = [] # 用于计算KL参数 # list_for_seq = [] # 用于标记 if first_flag == 0: message_str = input("Please reenter a new plaintext:") # output_amount = len(message_str) # 得到对隐写信息(人名)的大小写集合 message_str = message_str.upper() arr = list(message_str) generated_array = dfs(arr, 0, []) first_flag = 0 covertext_list = [] for temp_count in range(0, len(generated_array)): # First encode message to uniform bits, without any context # (not essential this is arithmetic vs ascii, but it's more efficient when the message is natural language) # if temp_count > 10: # break # 测试时最好完成修正,此处限制输出10个COVERTEXT print("=" * 80) print("Altering the #{} msg_str:".format(temp_count), message_str) message_str = generated_array[temp_count] # 选择一个隐写信息(比如 KiErAn) # 得到message。即上文所述的字节流 if unicode_enc: ba = bitarray.bitarray() ba.frombytes(message_str.encode('utf-8')) message = ba.tolist() else: message_ctx = [enc.encoder['<|endoftext|>']] message_str += '<eos>' message = decode_arithmetic(model, enc, message_str, message_ctx, precision=40, topk=60000) # print("First encode the text to a bit sequence!") # print(message) # the binary stream. text--arithmetic-->binary stream # print("the length is {}".format(len(message))) # Next encode bits into cover text, using arbitrary context # 下方完成隐写算法,使用不同隐写算法将字节流嵌入进生成文本中,得到out经过GPT2的解码器得到COVERTEXT Hq = 0 if mode == 'arithmetic': out, nll, kl, words_per_bit, Hq = encode_arithmetic( model, enc, message, context_tokens, temp=temp, finish_sent=finish_sent, precision=precision, topk=topk) elif mode == 'huffman': out, nll, kl, words_per_bit = encode_huffman( model, enc, message, context_tokens, block_size, finish_sent=finish_sent) elif mode == 'bins': out, nll, kl, words_per_bit = encode_block( model, enc, message, context_tokens, block_size, bin2words, words2bin, finish_sent=finish_sent) elif mode == 'saac': out, nll, kl, words_per_bit, Hq, topk_list, case_studies = encode_saac( model, enc, message, context_tokens, device=device, temp=temp, precision=precision, topk=topk, nucleus=nucleus) # add thing contains device='cuda', temp=1.0, precision=26, topk=50, nucleus=0.95. covertext = enc.decode(out) covertext_list.append(covertext) # 将所有COVERTEXT保存到一个结构中,可供调用 # list_for_bpw.append(1/words_per_bit) # 用于计算参数 # list_for_DKL.append(kl) # 用于计算参数 # list_for_seq.append(temp_count) # print("="*40 + " Encoding " + "="*40) # 打印结果,COVERTEXT,此处可以将covertext进行提取。 print( '#{} generated covertext:\n'.format(temp_count), covertext ) # covertext. generated covertext that contains secret information. print( 'ppl: %0.2f, kl: %0.3f, words/bit: %0.2f, bits/word: %0.2f, entropy: %.2f' % (math.exp(nll), kl, words_per_bit, 1 / words_per_bit, Hq / 0.69315)) # ----------------------------------------------------------------------------------- # 以下为隐写提取过程, 选择不同的隐写算法对covertext进行提取,得到字节流 MESSAGE_REC # Decode binary message from bits using the same arbitrary context # 下方在编写时可能会使用到,这里先注释掉,接收人将自己的名字和covertext输入进行判定。 # input_name = input("Please input ur name:") # input_covertext = input("Please input the covertext:") # covertext = input_covertext if mode == 'arithmetic': message_rec = decode_arithmetic(model, enc, covertext, context_tokens, temp=temp, precision=precision, topk=topk) elif mode == 'huffman': message_rec = decode_huffman(model, enc, covertext, context_tokens, block_size) elif mode == 'bins': message_rec = decode_block(model, enc, covertext, context_tokens, block_size, bin2words, words2bin) elif mode == 'saac': message_rec = decode_saac(model, enc, covertext, context_tokens, device=device, temp=temp, precision=precision, topk=topk, nucleus=nucleus) # print("="*40 + " Recovered Message " + "="*40) # print(message_rec) # binary stream extracted from stego_text. # print("=" * 80) # Finally map message bits back to original text # 对字节流进行解码操作,最终得到的reconst变量即为最终隐写提取所得,正常使用应为人名。 if unicode_enc: message_rec = [bool(item) for item in message_rec] ba = bitarray.bitarray(message_rec) reconst = ba.tobytes().decode('utf-8', 'ignore') else: reconst = encode_arithmetic(model, enc, message_rec, message_ctx, precision=40, topk=60000) # reconst = encode_arithmetic(model, enc, message_rec, message_ctx, temp=temp, precision=precision, topk=topk) # print("reconst[0] is", format(reconst[0])) reconst = enc.decode(reconst[0]) print("The decode text is ") print(reconst[0:-5] ) # Decoded text. message_rec --arithmetic decode--> reconst
def encrypt(unicode_enc, mode, block_size, temp, precision, topk, device, finish_sent, model_name, delta, context, message_str): print("loading GPT-2 LM to GPU") enc, model = get_model(model_name=model_name) print("finish loading !") print("implication of {}".format(mode)) if mode == 'bins': bin2words, words2bin = get_bins(len(enc.encoder), block_size) if delta and mode == "saac": nucleus = 2**(-1.0 * delta) first_flag = 1 context_tokens = encode_context(context, enc) while (1): sentence_assmble = [] if first_flag == 0: message_str = input("Please reenter a new plaintext:") # output_amount = len(message_str) message_str = message_str.upper() arr = list(message_str) generated_array = dfs(arr, 0, []) first_flag = 0 for temp_count in range(0, len(generated_array)): # First encode message to uniform bits, without any context # (not essential this is arithmetic vs ascii, but it's more efficient when the message is natural language) # if temp_count > 10: # protect from running too much times. # break print("=" * 80) print("Altering the #{} msg_str:".format(temp_count), message_str) message_str = generated_array[temp_count] if unicode_enc: ba = bitarray.bitarray() ba.frombytes(message_str.encode('utf-8')) message = ba.tolist() else: message_ctx = [enc.encoder['<|endoftext|>']] message_str += '<eos>' message = decode_arithmetic(model, enc, message_str, message_ctx, precision=40, topk=60000) # message = decode_arithmetic(model, enc, message_str, message_ctx, precision=precision, topk=topk, temp=temp) Hq = 0 if mode == 'arithmetic': out, nll, kl, words_per_bit, Hq = encode_arithmetic( model, enc, message, context_tokens, temp=temp, finish_sent=finish_sent, precision=precision, topk=topk) elif mode == 'huffman': out, nll, kl, words_per_bit = encode_huffman( model, enc, message, context_tokens, block_size, finish_sent=finish_sent) elif mode == 'bins': out, nll, kl, words_per_bit = encode_block( model, enc, message, context_tokens, block_size, bin2words, words2bin, finish_sent=finish_sent) words_per_bit = 1 elif mode == 'saac': out, nll, kl, words_per_bit, Hq, topk_list, case_studies = encode_saac( model, enc, message, context_tokens, device=device, temp=temp, precision=precision, topk=topk, nucleus=nucleus) # add thing contains device='cuda', temp=1.0, precision=26, topk=50, nucleus=0.95. text = enc.decode(out) # print("="*40 + " Encoding " + "="*40) print( '#{} generated covertext:\n'.format(temp_count), text ) # covertext. generated text that contains secret information. # print('ppl: %0.2f, kl: %0.3f, words/bit: %0.2f, bits/word: %0.2f, entropy: %.2f' % (math.exp(nll), kl, words_per_bit, 1/words_per_bit, Hq/0.69315)) sentence_assmble.append(text) dataframe = pd.DataFrame({'Sentences': sentence_assmble}) dataframe.to_csv("User_{}_Name_{}_Amount_{}.csv".format( random.randint(1, 10000), message_str.upper()[0:-5], len(generated_array)), index=False, sep=',')
def embed(unicode_enc=False, mode='saac', block_size=1, temp=0.9, precision=26, topk=300, device='cuda', finish_sent=False, nucleus=0.95, delta=0.01, model_name='gpt2', context_file='D:/OneDrive - whu.edu.cn/桌面/NeuralSteganography-master1/context.txt', name='Gogo'): # Example: embed(mode='saac', name='Chhenl', temp=0.9) # covertext_list保存生成文本的列表(生成10个) temp = float(temp) message_str = name # VALIDATE PARAMETERS 验证隐写算法 if mode not in ['arithmetic', 'huffman', 'bins', 'saac']: raise NotImplementedError # 打印隐写信息(人名) print("Plain_text is ", message_str) # 读取上下文 f = open(context_file, 'r', encoding='utf-8') context = f.read() f.close() print("sample context is ", context) # related to the text generation procedure. # 加载文本生成模型 print("loading GPT-2 LM to GPU") enc, model = get_model(model_name=model_name) print("finish loading !") print("implication of {}".format(mode)) # bins隐写算法的处理 if mode == 'bins': bin2words, words2bin = get_bins(len(enc.encoder), block_size) # saac隐写算法的处理 if delta and mode == "saac": nucleus = 2 ** (-1.0 * delta) # first_flag = 1 # 对下文中默认处理的标志 context_tokens = encode_context(context, enc) # 对context进行语言模型相关的编码 # 得到对隐写信息(人名)的大小写集合 message_str = message_str.upper() arr = list(message_str) generated_array = dfs(arr, 0, []) # first_flag = 0 covertext_list = [] for temp_count in range(0, len(generated_array)): # First encode message to uniform bits, without any context # (not essential this is arithmetic vs ascii, but it's more efficient when the message is natural language) if temp_count > 1: break # 测试时最好完成修正,此处限制输出10个COVERTEXT print("=" * 80) print("Altering the #{} msg_str:".format(temp_count), message_str) message_str = generated_array[temp_count] # 选择一个隐写信息(比如 KiErAn) # 得到message。即上文所述的字节流 if unicode_enc: ba = bitarray.bitarray() ba.frombytes(message_str.encode('utf-8')) message = ba.tolist() else: message_ctx = [enc.encoder['<|endoftext|>']] message_str += '<eos>' message = decode_arithmetic(model, enc, message_str, message_ctx, precision=40, topk=60000) # Next encode bits into cover text, using arbitrary context # 下方完成隐写算法,使用不同隐写算法将字节流嵌入进生成文本中,得到out经过GPT2的解码器得到COVERTEXT Hq = 0 if mode == 'arithmetic': out, nll, kl, words_per_bit, Hq = encode_arithmetic(model, enc, message, context_tokens, temp=temp, finish_sent=finish_sent, precision=precision, topk=topk) elif mode == 'huffman': out, nll, kl, words_per_bit = encode_huffman(model, enc, message, context_tokens, block_size, finish_sent=finish_sent) elif mode == 'bins': out, nll, kl, words_per_bit = encode_block(model, enc, message, context_tokens, block_size, bin2words, words2bin, finish_sent=finish_sent) elif mode == 'saac': out, nll, kl, words_per_bit, Hq, topk_list, case_studies = encode_saac(model, enc, message, context_tokens, device=device, temp=temp, precision=precision, topk=topk, nucleus=nucleus) covertext = enc.decode(out) covertext_list.append(covertext) # 将所有COVERTEXT保存到一个结构中,可供调用 # 打印结果,COVERTEXT,此处可以将covertext进行提取。 print('#{} generated covertext:\n'.format(temp_count), covertext) # covertext. generated covertext that contains secret information. return covertext_list
def extract(unicode_enc=False, mode='saac', block_size=1, temp=0.9, precision=26, topk=300, device='cuda', finish_sent=False, nucleus=0.95, delta=0.01, model_name='gpt2', context_file='D:/OneDrive - whu.edu.cn/桌面/NeuralSteganography-master1/context.txt', covertext='Hello', name='Gogo'): # Example: extract(mode='saac', name='Chhenl', temp=0.9, covertext='Temp') temp = float(temp) # VALIDATE PARAMETERS 验证隐写算法 if mode not in ['arithmetic', 'huffman', 'bins', 'saac']: raise NotImplementedError # 打印隐写信息(人名) print("Cover_text is ", covertext) print("Target is ", name) # 读取上下文 f = open(context_file, 'r', encoding='utf-8') context = f.read() f.close() print("sample context is ", context) # related to the text generation procedure. # 加载文本生成模型 print("loading GPT-2 LM to GPU") enc, model = get_model(model_name=model_name) print("finish loading !") message_ctx = [enc.encoder['<|endoftext|>']] print("implication of {}".format(mode)) # bins隐写算法的处理 if mode == 'bins': bin2words, words2bin = get_bins(len(enc.encoder), block_size) # saac隐写算法的处理 if delta and mode == "saac": nucleus = 2 ** (-1.0 * delta) context_tokens = encode_context(context, enc) # 对context进行语言模型相关的编码 # ----------------------------------------------------------------------------------- # 以下为隐写提取过程, 选择不同的隐写算法对covertext进行提取,得到字节流 MESSAGE_REC # Decode binary message from bits using the same arbitrary context # 下方在编写时可能会使用到,这里先注释掉,接收人将自己的名字和covertext输入进行判定。 # input_name = input("Please input ur name:") # input_covertext = input("Please input the covertext:") # covertext = input_covertext if mode == 'arithmetic': message_rec = decode_arithmetic(model, enc, covertext, context_tokens, temp=temp, precision=precision, topk=topk) elif mode == 'huffman': message_rec = decode_huffman(model, enc, covertext, context_tokens, block_size) elif mode == 'bins': message_rec = decode_block(model, enc, covertext, context_tokens, block_size, bin2words, words2bin) elif mode == 'saac': message_rec = decode_saac(model, enc, covertext, context_tokens, device=device, temp=temp, precision=precision, topk=topk, nucleus=nucleus) # print("="*40 + " Recovered Message " + "="*40) # print(message_rec) # binary stream extracted from stego_text. # print("=" * 80) # Finally map message bits back to original text # 对字节流进行解码操作,最终得到的reconst变量即为最终隐写提取所得,正常使用应为人名。 if unicode_enc: message_rec = [bool(item) for item in message_rec] ba = bitarray.bitarray(message_rec) reconst = ba.tobytes().decode('utf-8', 'ignore') else: reconst = encode_arithmetic(model, enc, message_rec, message_ctx, precision=40, topk=60000) # reconst = encode_arithmetic(model, enc, message_rec, message_ctx, temp=temp, precision=precision, topk=topk) # print("reconst[0] is", format(reconst[0])) reconst = enc.decode(reconst[0]) print("The decode text is ") print(reconst[0:-5]) # Decoded text. message_rec --arithmetic decode--> reconst # 这里完成基本的判断,判断此时的covertext是否指向此人名,这里对应输入设置。 extracted_name = reconst[0:-5].upper() # print("input name is ", name) # print("extracted name is ", extracted_name) if extracted_name == name.upper(): print("YOU ARE THE ONE! (^..^)") return 1 # Success else: print("PITY. ('..') ") return 0