def FproSocket(host, port, cmd, type=None): ''' @host: 主机IP @port: 主机端口 @cmd: 指令 ''' # 开始通信 try: sock = socket.socket() sock.settimeout(SOCKET['PROTIMEOUT']) sock.connect((host, port)) if type: sock.send(encode(cmd + type)) else: if host == '127.0.0.1': sock.send(cmd) else: sock.send(encode(cmd)) sock.close() except Exception as e: save_log('ERROR',str(e)) return False return 1
def main(): user_input = int(raw_input("Welcome \ to steganography.\nEnter 1 for encoding \ an image or 2 for decoding an image.\n")) if user_input == 1: print "Message length must have a length less than 255." encode_image = raw_input("Please enter the \ image you would like to encode your message \ into.\n") hidden_text = raw_input("Please enter the \ text file that contains the message you \ would like to send.\n") dest_image = raw_input("Please enter the \ filename you would like the encoded image \ to be called.\n") encode(encode_image, hidden_text, dest_image) user_email = int(raw_input("Would you like to send this in an email?\n\ If so, enter 1. If not, enter any other character.\n")) if user_email == 1: sendEmail(dest_image); elif user_input == 2: decode_image = raw_input("Please enter the \ image you would like to decode.\n") decode(decode_image) else: print "You have entered an invalid option."
def metropolis_hastings(mc, doc): #initialize randomized T t = gen_t() #consider starting fresh after k iterations for i in range(20000): #t = t if (i%2000) != 0 else gen_t() #if i % 10000: #new_seed = gen_t() #if abs(logprob(mc, encode.encode(doc, new_seed[0], new_seed[1]))) < abs(logprob(mc, encode.encode(doc, t[0], t[1]))): #t = new_seed if random.randrange(100) < 99: tp = mod_sub(copy.deepcopy(t)) else: tp = mod_transl(copy.deepcopy(t)) logprob_decoded_t = logprob(mc, encode.encode(doc, t[0], t[1])) logprob_decoded_tp = logprob(mc, encode.encode(doc, tp[0], tp[1])) # logprob is closer to zero if probability is larger # check to make sure comparison is right if abs(logprob_decoded_tp) < abs(logprob_decoded_t): t = copy.deepcopy(tp) else: ratio = math.exp(logprob_decoded_tp - logprob_decoded_t) if random.random() > ratio: t = copy.deepcopy(t) else: t = copy.deepcopy(tp) return t
def break_code(string, corpus, replace_table, rearrange_table, t): corpus_trans_prob = get_tran_prob(corpus) decode_string = encode.encode(string, replace_table, rearrange_table) prob_decode_d = caluclate_prob(corpus_trans_prob, decode_string) count = 0 re_pl = {} re_ar = {} t1 = time.time() while time.time() - t1 < t: count += 1 replace_table1, rearrange_table1 = get_updated_tables( replace_table, rearrange_table, corpus_trans_prob) decode_string1 = encode.encode(string, replace_table1, rearrange_table1) prob_decode_d1 = caluclate_prob(corpus_trans_prob, decode_string1) if prob_decode_d > prob_decode_d1: prob = np.random.binomial( 1, np.exp((prob_decode_d1 - prob_decode_d) / 100)) if (prob == 1): replace_table, rearrange_table = copy.deepcopy( replace_table1), copy.deepcopy(rearrange_table1) prob_decode_d = prob_decode_d1 else: re_pl[prob_decode_d1] = replace_table1 re_ar[prob_decode_d1] = rearrange_table1 replace_table, rearrange_table = copy.deepcopy( replace_table1), copy.deepcopy(rearrange_table1) prob_decode_d = prob_decode_d1 print(count) return decode_string, re_pl, re_ar
def _get_data(self): """ Call the _commit() methods of the children nodes. And return the encoded data of the node, which contains its type, and the offsets of the children nodes. """ data = {} if self.bucket != None: for (key, value) in self.bucket.items(): data[key] = value._commit() if self.rest != None: rest_data = self.rest._commit() # print("Node committed: " + str(self)+ " bucketsize: " + # str(len(self.bucket))) # print("Node data: " + # str({"type":"Node", "rest":rest_data, "entries":data})) return add_integrity(encode({"type":"Node", "rest":rest_data, "entries":data})) # print("Node committed: " + str(self)+ " bucketsize: " + # str(len(self.bucket))) # print("Node data: "+ str({"type":"Node", "entries":data})) return add_integrity(encode({"type":"Node", "entries":data}))
def FproSocket(host, port, cmd, type=None): ''' @host: 主机IP @port: 主机端口 @cmd: 指令 ''' # 开始通信 try: sock = socket.socket() sock.settimeout(SOCKET['PROTIMEOUT']) sock.connect((host, port)) if type: sock.send(encode(cmd + type)) else: if host == '127.0.0.1': sock.send(cmd) else: sock.send(encode(cmd)) sock.close() except Exception as e: save_log('ERROR', str(e)) return False return 1
def break_the_code(string, corpus): #Generate initial replace and rearrangement tables letters = list(range(ord('a'), ord('z') + 1)) random.shuffle(letters) replace_table = dict( zip(map(chr, range(ord('a'), ord('z') + 1)), map(chr, letters))) rearrange_table = list(range(0, 4)) random.shuffle(rearrange_table) modified = 0 cnt = 0 end_encryption = [] corpus_W0, corpus_W1 = generate_probs(corpus) best_score = score_my_file( corpus_W0, corpus_W1, encode.encode(string, replace_table, rearrange_table)) # i = 0 while True: # i+=1 if np.random.uniform() > 0.5: modified = 1 rearrange_table_backup = deepcopy(rearrange_table) rearrange_table = deepcopy(generate_rearrangement(rearrange_table)) else: modified = 0 replace_table_backup = deepcopy(replace_table) replace_table = deepcopy(generate_replacement(replace_table)) score = score_my_file( corpus_W0, corpus_W1, encode.encode(string, replace_table, rearrange_table)) if score > best_score: cnt += 1 best_score = score #Replace the best_score with the new score else: #If not, replace with probability P(D')/P(D) if np.random.binomial(1, np.exp(score - best_score)) == 0: if modified == 1: rearrange_table = deepcopy( rearrange_table_backup) #Revert and not replace else: replace_table = deepcopy( replace_table_backup) #Revert and not replace else: cnt += 1 best_score = score #Replace # print('Iter:',i,'Count:',cnt) end_encryption.append(cnt) #If there have been at least 20k iterations and the last 1000 entries have not changed then abort if len(end_encryption) > 20000: all_same = end_encryption[len(end_encryption) - 1000:len(end_encryption)] if all(x == all_same[0] for x in all_same): break final_decryption = encode.encode(string, replace_table, rearrange_table) return final_decryption
def agreement(P, config={}): ''' P: 导频数 config: 密钥生成配置项,包括: sampling_period:采样周期 sampling_time:采样时间 corr_ab: Alice和Bob的信道测量值的相关系数 corr_ae: Alice和Eve的信道测量值的相关系数 block_size:双阈值量化的子块采样点数 coef: 双阈值量化的量化系数 qtype: 均匀量化的编码方式。gray/natural order: 均匀量化的量化阶数 mtype: 合并类型。RSSI/Phase/cross/and/or/xor iteration: 信息协调迭代次数 m: 汉明码监督位m ''' ''' 采样参数 ''' sampling_period = config.get('sampling_period', 1) sampling_time = config.get('sampling_time', 3) corr_ab = config.get('corr_ab', 0.9) corr_ae = config.get('corr_ae', 0.4) ''' 量化参数 ''' block_size = config.get('block_size', 25) coef = config.get('coef', 0.8) qtype = config.get('qtype', 'gray') order = config.get('order', 1) mtype = config.get('mtype', 'cross') ''' 信息协调参数 ''' iteration = config.get('iteration', 2) m = config.get('m', 3) ''' 采样 ''' rssi_A, rssi_B, rssi_E = sampling_RSSI(sampling_period, sampling_time, corr_ab, corr_ae) phase_A, phase_B, phase_E = sampling_phase(sampling_period, sampling_time, corr_ab, corr_ae) ''' RSSI量化 ''' bits_A_rssi, drop_list_A = quantize_ASBG_1bit(rssi_A, block_size, coef) bits_B_rssi, drop_list_B = quantize_ASBG_1bit(rssi_B, block_size, coef) bits_E_rssi, drop_list_E = quantize_ASBG_1bit(rssi_E, block_size, coef) bits_A_rssi = remain(bits_A_rssi, drop_list_B) bits_B_rssi = remain(bits_B_rssi, drop_list_A) bits_E_rssi = remain(bits_E_rssi, drop_list_A) ''' Phase量化 ''' bits_A_phase = quantize_phase(phase_A, qtype, order) bits_B_phase = quantize_phase(phase_B, qtype, order) bits_E_phase = quantize_phase(phase_E, qtype, order) ''' 合并 ''' bits_A = merge(bits_A_rssi, bits_A_phase, mtype) bits_B = merge(bits_B_rssi, bits_B_phase, mtype) bits_E = merge(bits_E_rssi, bits_E_phase, mtype) ''' 信息协调 ''' bits_A, bits_B = winnow(bits_A, bits_B, iteration, m) ''' 生成导频 ''' pos_A = encode(bits_A, P) pos_B = encode(bits_B, P) pos_E = encode(bits_E, P) return pos_A, pos_B, pos_E
def commit(self): with open(self.filename, 'ba') as db: self.root._commit(db) pos = db.tell() db.write(encode(self)) footer = {'type': 'footer', 'tree': pos} db.write(encode(footer))
def main(): # input flags for this program parser = OptionParser() parser.add_option("-m", "--message", action="store_true", dest="encode_message", default=True, help="encode a message from the command line") parser.add_option("-e", "--encode", dest="encode_file", help="text file to with message to encode and send") parser.add_option("-o", "--output", dest="output_file", help="output file with encoded audio") parser.add_option("-d", "--decode", dest="decode_file", help="wav file to decode") parser.add_option("-p", "--play", action="store_true", dest="play_audio", default=False, help="play audio after encoding text message") parser.add_option("-l", "--listen", action="store_true", dest="listen", default=False, help="listen for audio signal and decode") options, args = parser.parse_args() if options.encode_file: output_file = options.output_file if options.output_file else 'output.wav' encode(options.encode_file, output_file) if options.play_audio: play_audio(output_file) elif options.listen: return listen() elif options.decode_file: decode(options.decode_file) elif options.encode_message: message = raw_input("Type your message: ") output_file = "output.wav" text_file = open("temp.txt", "w") text_file.write(message) text_file.close() encode("temp.txt", output_file) print "* * done! Your audio file is available at 'output.wav'." if options.play_audio: play_audio(output_file) else: parser.print_help() return
def agreement(P,mtype='cross',iteration=2,corr_ab=0.9,corr_ae=0.4): ''' P: 导频数 mtype: 合并类型。RSSI/Phase/cross/and/or/xor iteration: winnow迭代次数 corr_ab: Alice和Bob的信道测量值的相关系数 corr_ae: Alice和Eve的信道测量值的相关系数 ''' ''' 采样参数 ''' sampling_period = 1 sampling_time = 3 ''' 量化参数 ''' block_size = 25 coef = 0.8 qtype = 'gray' order = 1 ''' 采样 ''' rssi_A,rssi_B,rssi_E = sampling('RSSI',sampling_period,sampling_time,corr_ab,corr_ae) phase_A,phase_B,phase_E = mod(sampling('Phase',sampling_period,sampling_time,corr_ab,corr_ae),2*pi) #print 'corrcoef of rssi between AB and AE:',corrcoef(rssi_A, rssi_B, rowvar=0)[0,1],corrcoef(rssi_A, rssi_E, rowvar=0)[0,1] #print 'corrcoef of phase between AB and AE:',corrcoef(phase_A,phase_B,rowvar=0)[0,1],corrcoef(phase_A,phase_E,rowvar=0)[0,1] ''' RSSI量化 ''' bits_A_rssi,drop_list_A = quantization_thre(rssi_A,block_size,coef) bits_B_rssi,drop_list_B = quantization_thre(rssi_B,block_size,coef) bits_E_rssi,drop_list_E = quantization_thre(rssi_E,block_size,coef) bits_A_rssi = remain(bits_A_rssi,drop_list_A,drop_list_B) bits_B_rssi = remain(bits_B_rssi,drop_list_A,drop_list_B) bits_E_rssi = remain(bits_E_rssi,drop_list_A,drop_list_E) #print 'BMR of RSSI before winnow between AB',BMR(bits_A_rssi,bits_B_rssi) ''' Phase量化 ''' bits_A_phase = quantization_even('Phase',phase_A,size(phase_A),qtype,order) bits_B_phase = quantization_even('Phase',phase_B,size(phase_B),qtype,order) bits_E_phase = quantization_even('Phase',phase_E,size(phase_E),qtype,order) #print 'BMR of phase before winnow between AB',BMR(bits_A_phase,bits_B_phase) ''' 合并 ''' bits_A = merge(bits_A_rssi,bits_A_phase,mtype) bits_B = merge(bits_B_rssi,bits_B_phase,mtype) bits_E = merge(bits_E_rssi,bits_E_phase,mtype) #print 'BMR of merge before winnow between AB',BMR(bits_A,bits_B) ''' winnow信息协调 ''' bits_A, bits_B = winnow(bits_A,bits_B,iteration) #print 'BMR of merge after winnow between AB',BMR(bits_A,bits_B) ''' 生成导频 ''' pos_A = encode(bits_A,P) pos_B = encode(bits_B,P) pos_E = encode(bits_E,P) return pos_A,pos_B,pos_E
def go(): test = input("Press 1 for encoding and 2 for decoding, then press Enter. ") if (test == "1"): encode() elif (test == "2"): decode() else: print("Please type 1 or 2. Try again.") go()
def go(): test = input("Press 1 for encoding and 2 for decoding, then press Enter. ") if(test == "1"): encode() elif(test == "2"): decode() else: print("Please type 1 or 2. Try again.") go()
def proSocket(host, port, cmd, type=None): ''' @host: 主机IP @port: 主机端口 @cmd: 指令 ''' data = '' d = '' try: sock = socket.socket() sock.settimeout(SOCKET['PROTIMEOUT']) sock.connect((host, port)) print encode(cmd) if type: sock.send(encode(cmd + type)) else: sock.send(encode(cmd)) while True: d = sock.recv(SOCKET['BUFSIZE']) data = data + d time.sleep(SOCKET['DELAY']) if not d: break try: data = decode(data) except Exception as e: log_error('decode error:'+str(e) + ' Try increasing the delay.') sock.send(encode(cmd)) data='' d='' while True: d = sock.recv(SOCKET['BUFSIZE']) data = data + d time.sleep(SOCKET['DELAY']+1) if not d: break try: data = decode(data) except Exception as e: log_error('recv decode error:'+str(e)) return False except Exception as e: log_error('ip: '+host+' , port: '+str(port)+' , proSocket other error: '+str(e)) sock.close() return False return data
def main(): """ Execute a full transmission of a text file through an Additive Gaussian Noise channel. """ symbols = read_message(MESSAGE_FILE) encode(symbols) os.system('python3 ../client/client.py --input_file=' + INPUT_FILE + ' --output_file=' + OUTPUT_FILE + ' --srv_hostname=iscsrv72.epfl.ch --srv_port=80') recovered_symbols = decode() write_guess(recovered_symbols, GUESS_FILE)
def embedB(imgB, dataList, Aindex): matrix_input = np.asarray(imgB) matrix_input = matrix_input.astype(int) embedRound = 1 matrix_output, nextInfoList = encode.encode(matrix_input, dataList, Aindex, embedRound) while len(nextInfoList) != 0: embedRound += 1 print('embedRound: ', embedRound) matrix_output, nextInfoList = encode.encode(matrix_output, nextInfoList, Aindex, embedRound) #matrix_output = encode(imgB, dataList) return matrix_output
def break_code(string, corpus): transition = transition_probability_distribution(corpus) initial = initial_probability_distribuition(corpus) no_of_iterations = 20000 # how many times sampling will be done to get better decrypted document old_replace_table, old_rearrange_table = modify_encryption( ) #Stores initial encryption tables T_guess = encode.encode(string, old_replace_table, old_rearrange_table) #encoding T_prob = log_probability_of_document( T_guess.split(" "), transition, initial) # returns prob for guess with given encryption new_replace_table = copy.deepcopy( old_replace_table) #depcopy used to copy table in new table new_rearrange_table = copy.deepcopy(old_rearrange_table) while (no_of_iterations != 0): old_rearrange_table = copy.deepcopy(new_rearrange_table) old_replace_table = copy.deepcopy(new_replace_table) flag = random.randint( 0, 1 ) #generate random number among 0 and 1 to modify one of the two encryption tables if (flag == 0): new_replace_table = modify_replace(old_replace_table) else: new_rearrange_table = modify_rearrangement(old_rearrange_table) T_hat = encode.encode( string, new_replace_table, new_rearrange_table) #encoding with modified encryption tables T_hat_prob = log_probability_of_document( T_hat.split(" "), transition, initial) # returns prob for guess with given encryption if (T_hat_prob > T_prob): # new guess is better than old T_prob = T_hat_prob else: # new guess is not better than old if (np.random.binomial(1, np.exp(T_hat_prob - T_prob)) == 0): if flag == 1: #changing one of the two encryption tables new_rearrange_table = copy.deepcopy(old_rearrange_table) else: new_replace_table = copy.deepcopy(old_replace_table) else: T_prob = T_hat_prob no_of_iterations -= 1 # print(math.exp(T_hat_prob),math.exp(T_prob)) return encode.encode(string, new_replace_table, new_rearrange_table)
def FproSocket_modKey(host ,port ,cmd ,type=None): try: sock = socket.socket() #sock.settimeout(SOCKET['PROTIMEOUT']) sock.connect((host, port)) if type: sock.send(encode(cmd + type ,COMMANDS['_MOD_KEY'])) else: sock.send(encode(cmd,COMMANDS['_MOD_KEY'])) sock.close() except Exception as e: log_error('FproSocket error:'+str(e)) return 0 return 1
def main(args: List[str]): if args[0] not in ["encode", "decode", "dencode", "ddecode"]: raise ValueError( "Operation needs to be encode, decode, dencode, or ddecode.") if args[0] == "encode": print(" ".join(str(x) for x in encode(" ".join(args[1:])))) elif args[0] == "decode": print(decode([int(x) for x in args[1:]])) elif args[0] == "dencode": print(discordify(encode(" ".join(args[1:])))) elif args[0] == "ddecode": print(decode(undiscordify(" ".join(args[1:]))))
def test_isEncoderFileCreated(self): path = str(pathlib.Path(__file__).resolve().parents[2]) path = path + "/src/Facial recognition/dataset" print(path) endcode_obj = encode() is_file_created = endcode_obj.run(path) self.assertTrue(is_file_created)
def __init__(self): ''' @summary: initialize @result: None ''' super(decode, self) self.encode = encode.encode()
def main(): # Message to encode message = str(input("Enter message to encode: ")) # Create path directory = "Pics" file_name = "test.png" path = os.path.join(directory, file_name) # Load image image = cv2.imread(path, 1) # Show before cv2.imshow("before", image) # Encode message inside the image encoded_image = encode(image, message) # Save encoded image directory = "Pics" file_name = message.split(" ")[0] + ".png" path = os.path.join(directory, file_name) cv2.imwrite(path, image) # Show result cv2.imshow("after", encoded_image) # Decode image and receive message recovered_message = decode(encoded_image) print("Message should be the same as input: ", recovered_message) cv2.waitKey(0)
def send_messasge(src_host, host, port, covert_msg, plaintext_msg): plaintext_msg += udp.EOF encoded_input = encode.encode(covert_msg) #debug print "encoded input: " + str(encoded_input) packet_sender = udp.UDP(host, port) msg_length = len(plaintext_msg) msg_segment_size = msg_length / len(encoded_input) + 1 min = 0 max = int(msg_length / msg_segment_size) + 1 for x in range(min, max): # determine what part of the fake message to send in this packet start = int(x * msg_segment_size) end = int(start + msg_segment_size) if end > msg_length: end = msg_length # determine what should be sent for the covert message covert_msg_segment = 0 if x < len(encoded_input): covert_msg_segment = encoded_input[x] else: covert_msg_segment = random.randint(SOURCE_PORT_MIN_INT, SOURCE_PORT_MAX_INT) packet_sender.send(src_host, covert_msg_segment, plaintext_msg[start:end])
def start_training(): alice_key, bob_key = dh_exchange() print('Key has been chosen !', alice_key, bob_key) np.random.seed(alice_key) from auto_encoder import train from encode import encode, decode print('Starting training ...') train(Callback2(), set_auto_encoder) alice_sentence = "bonjour bob 123456" encrypted_sentence = encode(alice_sentence) decrypted_sentence = decode(encrypted_sentence) print('Original sentence was', alice_sentence) print('Encrypted sentence was', np.array(encrypted_sentence).reshape((18, 10))) img = Image.fromarray( np.array(encrypted_sentence).reshape((18, 10)) * 255 / np.max(np.max(np.array(encrypted_sentence).reshape((18, 10))))) img.show() print('Decrypted sentence is', decrypted_sentence)
def write(self, data): f = open(self.filename, "ba") offset = f.tell() f.write(add_integrity(encode(data))) f.close() return offset
def _commit(self, db): for n in self.bucket.values(): n._commit(db) pos = db.tell() db.write(encode(self)) return pos
def senderResponse(s): global message if s == "0": makeSignal(message + encode(message), bitTime) return False else: return True
def write_document(tofile, data): f = open(tofile, "ba") offset = f.tell() # print("offset: ", str(offset)) f.write(add_integrity(encode(data))) f.close() return offset
def t_render(ss, ex, exact=True): import tokenize, parse, encode if not istype(ss, 'list'): ss = [ss] n = 1 for s in ss: fname = 'tmp' + str(n) + '.tpc' system_rm(fname) tokens = tokenize.tokenize(s) t = parse.parse(s, tokens) r = encode.encode(fname, s, t) f = save(fname, r) n += 1 system_rm('tmp.txt') cmd = VM + fname + " > tmp.txt" system(cmd) res = load(TMP).strip() #print(ss,ex,res) if exact: if res != ex: showerror(cmd, ss, ex, res) assert (res == ex) else: if ex not in res: showerror(cmd, ss, ex, res) assert (ex in res)
def main(): ss = Preferences() script1sourcedir = ss.getwpath("script1sourcedir") #("seeding\") files = [os.path.join(script1sourcedir,filename) for filename in next(os.walk(script1sourcedir))[2]] #gives absolute paths + names currentfile = 0 container = [] #set up an empty container for desired data to get put into for later for eachfile in files: metainfo = decoder.decode_from_file(eachfile) # #need to manually SHA1 hash the torrent file's info-dict to get the info-hash infodict = metainfo[b'info'] info_hash = hashlib.sha1(encode.encode(infodict)).hexdigest().upper() internalname = infodict[b'name'] torrentfilename = eachfile[eachfile.rfind("\\")+1:] locextension = torrentfilename.find(".torrent") #location of extension (char position) locid = torrentfilename.rfind("-")+1 #location of torrentID (char position) torrentid = torrentfilename[locid:locextension] #grab torrentID container.append([torrentfilename, internalname, info_hash, torrentid]) currentfile += 1 print(currentfile, torrentfilename.encode('ascii', errors='ignore').decode()) #console output is ascii only, cannot print unicode - chars are omitted #WRITE FILE 1 writelistfile = codecs.open(ss.getwpath("outpath1"),'wb',"utf-8") # write-out a text file with torrentID and Hash (on one line) ("1seeding_ID+Hash+Filename.txt") for eachline in container: writelistfile.write(eachline[3] + " / " + eachline[2] + " / " + eachline[0] + "\n") #output torrentID / Hash / torrentfilename writelistfile.close()
def t_render(ss,ex,exact=True): import tokenize, parse, encode if not istype(ss,'list'): ss =[ss] n = 1 for s in ss: fname = 'tmp'+str(n)+'.tpc' system_rm(fname) tokens = tokenize.tokenize(s) t = parse.parse(s,tokens) r = encode.encode(fname,s,t) f = save(fname,r) n += 1 system_rm('tmp.txt') cmd = VM + fname + " > tmp.txt" system(cmd) res = load(TMP).strip() #print(ss,ex,res) if exact: if res != ex: showerror(cmd, ss, ex, res) assert(res == ex) else: if ex not in res: showerror(cmd, ss, ex, res) assert(ex in res)
def ckcode(e, r): c = encrypt(e + r) t = encrypt(e) + encode(encrypt(r)) p = gen(t, c) u = encrypt(c) y = gen(p + gen(e, t) + u, t) return pricd(e, r, y)
def encrypt(): try: # creating new directory or checking if file exist or not os.path.exists("E:\\PROJ.RC\\python\\pyBot\\enc_files") #if file exist if will run except code except IOError: #if file exist call encode function encode() else: # otherwise (if there is no exception then it meas that file do not exist so create new file and call encode function) loop() print(" Directory does not exist") os.makedirs("E:\\PROJ.RC\\python\\pyBot\\enc_files") loop() print(" Created new file") encode() menu()
def run(self): print("Model LatentGAN running, encoding training set") encode(smiles_file=self.smiles_file, output_smiles_file_path=self.output_latent, encoder=self.encoder) print("Encoding finished finished. Creating model files") C = CreateModelRunner(input_data_path=self.output_latent, output_model_folder=self.storage_path) C.run() print("Model Created. Training model") T = TrainModelRunner(input_data_path=self.output_latent, output_model_folder=self.storage_path, decode_mols_save_path=self.decoded_smiles, n_epochs=self.n_epochs, sample_after_training=self.sample_size) T.run() print("Model finished.")
def ck(e, t, p, u, y, o): f = encrypt(e) g = encrypt(u + y) a = i = encrypt(e) + encode(encrypt(t)) + p b = gen_s(i, a) v = encrypt(f) + g j = gen_s(b + gen_s(e, a) + v, g) return prijm(e, t, p, u, y, o, j)
def test_gripper01_extracted_opgraph(self): file_dir = os.path.dirname(__file__) #<-- absolute dir the script is in original_sas = SAS3Extended.from_file(os.path.join(file_dir, 'test_cases/gripper01.sas')) encoded_sas = SAS3Extended.from_file(os.path.join(file_dir, 'test_cases/gripper01_extracted.sas')) candidates = extract_tau_operators_opgraph(original_sas) if len(candidates) > 0: original_sas = encode(original_sas,candidates) self.assertEqual(str(original_sas),str(encoded_sas))
def test_miconic_extracted_top_down(self): file_dir = os.path.dirname(__file__) #<-- absolute dir the script is in original_sas = SAS3Extended.from_file(os.path.join(file_dir, 'test_cases/miconic.sas')) expected_sas = SAS3Extended.from_file(os.path.join(file_dir, 'test_cases/miconic_extracted.sas')) candidates = extract_tau_operators_top(original_sas) if len(candidates) > 0: encoded_sas = encode(original_sas,candidates) self.assertEqual(str(encoded_sas),str(expected_sas))
def send(data): sock = serial.Serial(target) time.sleep(0.1) e = encode(data) chunk = 40 for s in (e[i:i+chunk] for i in range(0,len(e),chunk)): sock.write(s) time.sleep(0.1) sock.close()
def test_sokoban01_extracted_top_down(self): file_dir = os.path.dirname(__file__) #<-- absolute dir the script is in original_sas = SAS3Extended.from_file(os.path.join(file_dir, 'test_cases/sokoban01_essential.sas')) expected_sas = SAS3Extended.from_file(os.path.join(file_dir, 'test_cases/sokoban01_extracted.sas')) candidates = extract_tau_operators_top(original_sas) if len(candidates) > 0: encoded_sas = encode(original_sas,candidates) normalize(encoded_sas) self.assertMultiLineEqual(str(encoded_sas),str(expected_sas))
def encodeAllFaces(imgs): faces = [] for img in imgs: frame = cv2.imread(img) encoding = encode(frame) faces.extend(encoding) print("-- Processed {}: {} --".format(img, len(encoding))) # faces = [encode(cv2.imread(img)) for img in imgs] return np.array(faces)
def _commit(self): """ Commit the changes. Calling the _commit() method of the root node, and writing its offset in the footer at the end of the file. """ offset = self.root._commit() f = open(self.filename, "ba") f.write(add_integrity(encode({"root_offset":offset, "max_size":self.max_size}))) f.close()
def write_piece(ty, node, out): lazy_parts = lazy.LazyMemberExtractor(types) node = lazy_parts.replace(ty, node) encode.encode(types, m, out, ty, node) # Encode the lazy parts in memory lazy_encoded = [] for _, attr, part in lazy_parts.lazies: buf = io.BytesIO() lazy_encoded.append(buf) write_piece(attr.resolved_ty, part, buf) # Write the dictionary of lazy parts, then the lazy parts bits.write_varint(out, len(lazy_encoded)) for encoded_part in lazy_encoded: bits.write_varint(out, encoded_part.tell()) for encoded_part in lazy_encoded: out.write(encoded_part.getbuffer())
def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list): logger.info("Starting post-processing for: %s - %s" % (release["ArtistName"], release["AlbumTitle"])) # start enconding if headphones.ENCODE: downloaded_track_list = encode.encode(albumpath) if headphones.EMBED_ALBUM_ART or headphones.ADD_ALBUM_ART: album_art_path = albumart.getAlbumArt(albumid) artwork = urllib.urlopen(album_art_path).read() if headphones.EMBED_ALBUM_ART: embedAlbumArt(artwork, downloaded_track_list) if headphones.CLEANUP_FILES: cleanupFiles(albumpath) if headphones.ADD_ALBUM_ART: addAlbumArt(artwork, albumpath) if headphones.CORRECT_METADATA: correctMetadata(albumid, release, downloaded_track_list) if headphones.EMBED_LYRICS: embedLyrics(downloaded_track_list) if headphones.RENAME_FILES: renameFiles(albumpath, downloaded_track_list, release) if headphones.MOVE_FILES and headphones.DESTINATION_DIR: albumpath = moveFiles(albumpath, release, tracks) if headphones.MOVE_FILES and not headphones.DESTINATION_DIR: logger.error( 'No DESTINATION_DIR has been set. Set "Destination Directory" to the parent directory you want to move the files to' ) pass myDB = db.DBConnection() # There's gotta be a better way to update the have tracks - sqlite trackcount = myDB.select("SELECT HaveTracks from artists WHERE ArtistID=?", [release["ArtistID"]]) if not trackcount[0][0]: cur_track_count = 0 else: cur_track_count = trackcount[0][0] new_track_count = cur_track_count + len(downloaded_track_list) myDB.action("UPDATE artists SET HaveTracks=? WHERE ArtistID=?", [new_track_count, release["ArtistID"]]) myDB.action('UPDATE albums SET status = "Downloaded" WHERE AlbumID=?', [albumid]) myDB.action('UPDATE snatched SET status = "Processed" WHERE AlbumID=?', [albumid]) updateHave(albumpath) logger.info("Post-processing for %s - %s complete" % (release["ArtistName"], release["AlbumTitle"]))
def t_unrender(s): import tokenize, parse, encode ok = False try: tokens = tokenize.tokenize(s) t = parse.parse(s,tokens) r = encode.encode('tmp.tpc',s,t) except: ok = True assert(ok == True)
def _get_data(self): """ Returns the encoded data of the leaf node, containing its type, and the key/value pairs. These values will eventually be the offsets of the documents. """ # print("Leaf committed: " + str(self) + " bucketsize: " + # str(len(self.bucket))) data = {"type":"Leaf", "entries":self.bucket} # print("Leaf data: "+ str(data)) return(add_integrity(encode(data)))
def socketSend(host,port,cmd,type=None): ''' @host: 主机IP @port: 主机端口 @cmd: 指令 ''' if PortIsAlive(host, port,count=SOCKET['COUNT']) == False: return False data = d = '' try: sock = socket.socket() #settimeout后导致出现很多time out异常 #参考文档:http://www.douban.com/note/174791641/ #sock.settimeout(SOCKET['PROTIMEOUT']) sock.connect((host, port)) if type: sock.send(encode(cmd + type)) else: sock.send(encode(cmd)) while True: d = sock.recv(SOCKET['BUFSIZE']) data = data + d time.sleep(0.003) if not d: sock.close() return data except Exception as e: save_log('ERROR','ip: '+host+' , port: '+str(port)+' , socketSend error: '+str(e)) sock.close() return False
def oldproSocket(host, port, cmd, type=None): ''' @host: 主机IP @port: 主机端口 @cmd: 指令 ''' # 指令执行后返回的结果 data = '' # 开始通信 try: sock = socket.socket() sock.settimeout(SOCKET['PROTIMEOUT']) sock.connect((host, port)) if type: sock.send(encode(cmd + type)) else: sock.send(encode(cmd)) data = decode(sock.recv(SOCKET['BUFSIZE'])) sock.close() except socket.timeout: sock.send(encode('Time out!')) save_log('WARNING','host:'+host+' Time out!') sock.close() return data sys.exit() except socket.error, args: (error_no, msg) = args error_log = 'Connect server faild:%s, error_no=%d ,error_host=%s' % (msg, error_no,host) save_log('ERROR',error_log) sock.close() return data sys.exit()
def FproSocket(host, port, cmd, type=None): ''' @host: 主机IP @port: 主机端口 @cmd: 指令 ''' try: sock = socket.socket() sock.settimeout(SOCKET['PROTIMEOUT']) sock.connect((host, port)) if type: sock.send(encode(cmd + type)) else: sock.send(encode(cmd)) sock.close() except Exception as e: log_error('FproSocket error:'+str(e)) return 0 return 1
def main(self, url, ie_key, subtitles, filename, filedesc, convertkey, username, oauth): outputdir = generate_dir() s = stats() def statuscallback(text, percent): if text is not None: s.text = text if percent is not None: s.percent = percent print '%d: %s' % (s.percent, s.text) self.update_state(state='PROGRESS', meta={'text': s.text, 'percent': s.percent}) def errorcallback(text): raise TaskError(text) statuscallback('Downloading...', -1) d = download.download(url, ie_key, 'bestvideo+bestaudio/best', subtitles, outputdir, statuscallback, errorcallback) if not d: errorcallback('Download failed!') file = d['target'] if not file: errorcallback('Download failed!') subtitles = subtitles and d['subtitles'] statuscallback('Converting...', -1) file = encode.encode(file, convertkey, statuscallback, errorcallback) if not file: errorcallback('Convert failed!') ext = file.split('.')[-1] statuscallback('Configuring Pywikibot...', -1) import pywikibot pywikibot.config.authenticate['commons.wikimedia.org'] = oauth pywikibot.config.usernames['commons']['commons'] = username pywikibot.Site('commons', 'commons', user=username).login() statuscallback('Uploading...', -1) fileurl = 'http://v2c.wmflabs.org/' + '/'.join(file.split('/')[3:]) filename += '.' + ext filename, wikifileurl = upload.upload(file, filename, url, fileurl, filedesc, username, statuscallback, errorcallback) if not wikifileurl: errorcallback('Upload failed!') if subtitles: statuscallback('Uploading subtitles...', -1) try: subtitleuploader.subtitles(subtitles, filename, username, statuscallback, errorcallback) except Exception, e: statuscallback(type(e).__name__ + ": " + str(e), None) print e pass
def addClicked(self, **kwargs): fileName = ".data" # Writing encoding data to file if(kwargs['password'].get() != "" and kwargs['service'].get() != ""): data = None details = [kwargs['username'].get(), encode.encode(kwargs['password'].get())] # Reading initally present data try: with open(fileName, "r") as outfile: data = outfile.read() except IOError: # Create a file if it doesn't exits open(fileName, "a").close() # Loading new data if data: data = json.loads(data) data[kwargs['service'].get()] = details else: data = {} data[kwargs['service'].get()] = details # Writing back the data with open(".data", "w") as outfile: outfile.write(json.dumps(data, sort_keys=True, indent=4)) # To delete contents of the Entry for widg in ('username', 'service', 'password'): kwargs[widg].delete(0, 'end') kwargs['info'].config(text="Added!!") # end of if else: kwargs['info'].config(text="Service or Password can't be empty!!")
def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list): logger.info('Starting post-processing for: %s - %s' % (release['ArtistName'], release['AlbumTitle'])) #start enconding if headphones.ENCODE: downloaded_track_list=encode.encode(albumpath) album_art_path = albumart.getAlbumArt(albumid) if headphones.EMBED_ALBUM_ART or headphones.ADD_ALBUM_ART: artwork = urllib.urlopen(album_art_path).read() if len(artwork) < 100: artwork = False logger.info("No suitable album art found. Not adding album art") if headphones.EMBED_ALBUM_ART and artwork: embedAlbumArt(artwork, downloaded_track_list) if headphones.CLEANUP_FILES: cleanupFiles(albumpath) if headphones.ADD_ALBUM_ART and artwork: addAlbumArt(artwork, albumpath) if headphones.CORRECT_METADATA: correctMetadata(albumid, release, downloaded_track_list) if headphones.EMBED_LYRICS: embedLyrics(downloaded_track_list) if headphones.RENAME_FILES: renameFiles(albumpath, downloaded_track_list, release) if headphones.MOVE_FILES and headphones.DESTINATION_DIR: albumpath = moveFiles(albumpath, release, tracks) if headphones.MOVE_FILES and not headphones.DESTINATION_DIR: logger.error('No DESTINATION_DIR has been set. Set "Destination Directory" to the parent directory you want to move the files to') pass myDB = db.DBConnection() # There's gotta be a better way to update the have tracks - sqlite trackcount = myDB.select('SELECT HaveTracks from artists WHERE ArtistID=?', [release['ArtistID']]) if not trackcount[0][0]: cur_track_count = 0 else: cur_track_count = trackcount[0][0] new_track_count = cur_track_count + len(downloaded_track_list) myDB.action('UPDATE artists SET HaveTracks=? WHERE ArtistID=?', [new_track_count, release['ArtistID']]) myDB.action('UPDATE albums SET status = "Downloaded" WHERE AlbumID=?', [albumid]) myDB.action('UPDATE snatched SET status = "Processed" WHERE AlbumID=?', [albumid]) updateHave(albumpath) logger.info('Post-processing for %s - %s complete' % (release['ArtistName'], release['AlbumTitle'])) if headphones.PROWL_ONSNATCH: pushmessage = release['ArtistName'] + ' - ' + release['AlbumTitle'] logger.info(u"Prowl request") prowl = notifiers.PROWL() prowl.notify(pushmessage,"Download and Postprocessing completed") if headphones.XBMC_ENABLED: xbmc = notifiers.XBMC() if headphones.XBMC_UPDATE: xbmc.update() if headphones.XBMC_NOTIFY: xbmc.notify(release['ArtistName'], release['AlbumTitle'], album_art_path)
def _compile(s,fname): tokens = tokenize.tokenize(s) t = parse.parse(s,tokens) r = encode.encode(fname,s,t) return r
sys.exit(0) rawdata = sys.argv[1:-1] ecl = sys.argv[-1] data = ' '.join(rawdata) # step 1. data analysis if set(data).issubset(set(numeric)): mode = NUMERIC elif set(data).issubset(set(alphanumeric)): mode = ALPHANUMERIC elif set(data).issubset(set(eightbitbyte)): mode = EIGHTBITBYTE print "mode determined... is", mode version = utils.getversion(data, mode) print "version dtermined... is", version print "generating codewords now... please wait" codewords = encode.encode(data, version, mode, ecl) print "generating finalmessage now... please wait" finalmessage = errorcode.genfinalmessage(codewords, version, ecl) print "generating matrix now... please wait" fmatrix = matrix.getmatrix(version, finalmessage, ecl) print "draw the qrcode now... please wait" draw.drawmatrix(fmatrix) print "..." print "done!"
def send(data): sock.send(encode(data))
def go(nucleotides_per_input=8000000, gzip_output=True, gzip_level=3, to_stdout=False, push='.', mover=filemover.FileMover(), verbose=False, scratch=None, bin_qualities=True, short_qnames=False, skip_bad_records=False, workspace_dir=None, fastq_dump_exe='fastq-dump', ignore_missing_sra_samples=False): """ Runs Rail-RNA-preprocess Input (read from stdin) ---------------------------- Tab-separated fields: ---If URL is local: 1. #!splitload 2. \x1d-separated list of 0-based indexes of reads at which to start each new file 3. \x1d-separated list of numbers of reads to include in gzipped files 4. \x1d-separated list of manifest lines whose tabs are replaced by \x1es ---Otherwise: manifest line A manifest line has the following format (for single-end reads) <URL>(tab)<Optional MD5>(tab)<Sample label> (for paired-end reads) <URL 1>(tab)<Optional MD5 1>(tab)<URL 2>(tab)<Optional MD5 2>(tab) <Sample label> Hadoop output (written to stdout) ---------------------------- None. Other output (written to directory specified by command-line parameter --push) ____________________________ Files containing input data in one of the following formats: Format 1 (single-end, 3-column): 1. Nucleotide sequence or its reversed complement, whichever is first in alphabetical order 2. 1 if sequence was reverse-complemented else 0 3. Name 4. Quality sequence or its reverse, whichever corresponds to field 1 Format 2 (paired, 2 lines, 3 columns each) (so this is the same as single-end) 1. Nucleotide sequence for mate 1 or its reversed complement, whichever is first in alphabetical order 2. 1 if sequence was reverse-complemented else 0 3. Name for mate 1 4. Quality sequence for mate 1 or its reverse, whichever corresponds to field 1 (new line) 1. Nucleotide sequence for mate 2 or its reversed complement, whichever is first in alphabetical order 2. 1 if sequence was reverse complemented else 0 3. Name for mate 2 4. Quality sequence for mate 2 or its reverse, whichever corresponds to field 1 Quality sequences are strings of Is for FASTA input. nucleotides_per_input: maximum number of nucleotides to put in a given input file gzip_output: True iff preprocessed input should be gzipped gzip_level: level of gzip compression to use push: where to send output verbose: True iff extra debugging statements should be printed to stderr scratch: scratch directory for storing temporary files or None if securely created temporary directory bin_qualities: True iff quality string should be binned according to rules in _mismatch_penalties_to_quality_scores and round_quality_string() defined in go() short_qnames: True iff original qname should be killed and a new qname should be written in a short base64-encoded format skip_bad_records: True iff bad records should be skipped; otherwise, raises exception if bad record is encountered workspace_dir: where to use fastq-dump -- needed for working with dbGaP data. None if temporary dir should be used. fastq_dump_exe: path to fastq-dump executable ignore_missing_sra_samples: does not return error if fastq-dump doesn't find a sample No return value """ if bin_qualities: import math def round_quality_string(qual): """ Bins phred+33 quality string to improve compression. Uses 5-bin scheme that does not affect Bowtie 2 alignments qual: quality string Return value: "binned" quality string. """ return ''.join( [str(int( _MN + math.floor((_MX - _MN) * min( ord(qual_char) - 33.0, 40.0 ) / 40.0) )) for qual_char in qual]).translate( _mismatch_penalties_to_quality_scores ) else: def round_quality_string(qual): """ Leaves quality string unbinned and untouched. qual: quality string Return value: qual """ return qual global _input_line_count, _output_line_count skip_stubs = False temp_dir = make_temp_dir(scratch) print >>sys.stderr, 'Created local destination directory "%s".' % temp_dir register_cleanup(tempdel.remove_temporary_directories, [temp_dir]) input_line_count, output_line_count = 0, 0 if not to_stdout: push_url = Url(push) if push_url.is_local: destination = push elif push_url.is_s3 or push_url.is_hdfs or push_url.is_nfs: destination = temp_dir else: raise RuntimeError('Push destination must be ' 'on S3, HDFS, NFS, or local.') fastq_cues = set(['@']) fasta_cues = set(['>', ';']) source_dict = {} onward = False for line in sys.stdin: _input_line_count += 1 if not line.strip(): continue # Kill offset from start of manifest file try: tokens = line.strip().split('\t')[1:] if tokens[0][0] == '#' and tokens[0] != '#!splitload': # Comment line continue except IndexError: # Be robust to bad lines continue token_count = len(tokens) qual_getter = None if tokens[0] == '#!splitload': '''Line specifies precisely how records from files should be placed.''' assert not to_stdout, ('Split manifest line inconsistent with ' 'writing to stdout.') qual_getter = phred_converter(phred_format=tokens[-1]) indexes = tokens[1].split('\x1d') read_counts = tokens[2].split('\x1d') manifest_lines = [token.split('\x1e') for token in tokens[3].split('\x1d')] assert len(indexes) == len(read_counts) == len(manifest_lines) for i, manifest_line in enumerate(manifest_lines): manifest_line_field_count = len(manifest_line) if manifest_line_field_count == 3: source_dict[(Url(manifest_line[0]),)] = ( manifest_line[-1], int(indexes[i]), int(read_counts[i]) ) else: assert manifest_line_field_count == 5 source_dict[(Url(manifest_line[0]), Url(manifest_line[2]))] = ( manifest_line[-1], int(indexes[i]), int(read_counts[i]) ) elif token_count == 3: # SRA or single-end reads source_dict[(Url(tokens[0]),)] = (tokens[-1],) elif token_count == 5: # Paired-end reads source_dict[(Url(tokens[0]), Url(tokens[2]))] = (tokens[-1],) else: # Not a valid line, but continue for robustness continue file_number = 0 for source_urls in source_dict: sample_label = source_dict[source_urls][0] downloaded = set() sources = [] records_printed = 0 if len(source_dict[source_urls]) == 3: skip_count = source_dict[source_urls][1] if len(source_urls) == 2: records_to_consume = source_dict[source_urls][2] if skip_count % 2: skip_count -= 1 records_to_consume += 1 if records_to_consume % 2: records_to_consume -= 1 # Index reads according to order in input to shorten read names read_index = skip_count / 2 # Index reads in pairs else: records_to_consume = source_dict[source_urls][2] read_index = skip_count else: skip_count = 0 records_to_consume = None # Consume all records read_index = 0 assert (records_to_consume >= 0 or records_to_consume is None), ( 'Negative value %d of records to consume encountered.' ) % records_to_consume if records_to_consume == 0: continue skipped = False for source_url in source_urls: if not source_url.is_local: # Download print >>sys.stderr, 'Retrieving URL "%s"...' \ % source_url.to_url() if source_url.is_dbgap: download_dir = workspace_dir elif source_url.is_sra: download_dir = temp_dir if source_url.is_sra: sra_accession = source_url.to_url() fastq_dump_command = ( 'set -exo pipefail; cd {download_dir}; ' '{fastq_dump_exe} -I -X 10000 --split-files ' '{sra_accession}' ).format(download_dir=download_dir, fastq_dump_exe=fastq_dump_exe, sra_accession=sra_accession) try: subprocess.check_call( fastq_dump_command, shell=True, executable='/bin/bash', stdout=sys.stderr ) except subprocess.CalledProcessError as e: if e.returncode == 3 and ignore_missing_sra_samples: onward = True break else: raise RuntimeError( ('Error "%s" encountered executing ' 'command "%s".') % (e.output, fastq_dump_command)) import glob sra_fastq_files = sorted( glob.glob(os.path.join(download_dir, '%s[_.]*' % sra_accession)) ) # ensure 1 before 2 if paired-end # Schedule for deletion def silent_remove(filename): try: os.remove(filename) except OSError as e: pass for sra_fastq_file in sra_fastq_files: register_cleanup(silent_remove, sra_fastq_file) sra_file_count = len(sra_fastq_files) check_for_paired = False if sra_file_count == 1: sra_paired_end = False print >>sys.stderr, 'Detected single-end SRA sample.' elif sra_file_count in [2, 3]: print >>sys.stderr, ('2 or 3 FASTQ files detected. ' 'Checking for barcodes...') check_for_paired = True else: raise RuntimeError( ('Unexpected number of files "%d" output ' 'by fastq-dump command "%s".') % (sra_file_count, fastq_dump_command) ) if check_for_paired: # Get max/min read lengths from FASTQ with open( sra_fastq_files[sra_file_count - 2] ) as fastq_stream: max_len, min_len = ( max_min_read_lengths_from_fastq_stream( fastq_stream ) ) print >>sys.stderr, ( 'Max/min read length found in candidate ' 'barcode FASTQ was {}/{}.' ).format(max_len, min_len) if max_len <= _max_stubby_read_length: print >>sys.stderr, ( 'Assumed barcode FASTQ.' ) skip_stubs = True if sra_file_count == 2: sra_paired_end = False else: sra_paired_end = True else: if sra_file_count == 2: sra_paired_end = True else: raise RuntimeError( '3 FASTQs detected, but one of them ' 'was not recognized as containing ' 'barcodes.' ) # Guess quality from first 10k lines with xopen(None, sra_fastq_files[0]) as source_stream: qual_getter = phred_converter( fastq_stream=source_stream ) for sra_fastq_file in sra_fastq_files: os.remove(sra_fastq_file) sources.append(os.devnull) fastq_dump_command = ( 'set -exo pipefail; cd {download_dir}; ' '{fastq_dump_exe} --split-spot -I --stdout ' '{sra_accession}' ).format(download_dir=download_dir, fastq_dump_exe=fastq_dump_exe, sra_accession=sra_accession) if skip_stubs: fastq_dump_command += ( ' | awk \'BEGIN {{OFS = "\\n"}} ' '{{header = $0; ' 'getline seq; getline qheader; getline qseq; ' 'if (length(seq) > {min_len}) {{print header, ' 'seq, qheader, qseq}}}}\'' ).format(min_len=_max_stubby_read_length) print >>sys.stderr, fastq_dump_command sra_process = subprocess.Popen(fastq_dump_command, shell=True, executable='/bin/bash', stdout=subprocess.PIPE, bufsize=-1) else: mover.get(source_url, temp_dir) downloaded = list( set(os.listdir(temp_dir)).difference(downloaded) ) sources.append(os.path.join(temp_dir, list(downloaded)[0])) else: sources.append(source_url.to_url()) if onward: continue '''Use os.devnull so single- and paired-end data can be handled in one loop.''' if len(sources) == 1: sources.append(os.devnull) if qual_getter is None: # Figure out Phred format with xopen(None, sources[0]) as source_stream: qual_getter = phred_converter(fastq_stream=source_stream) with xopen(None, sources[0]) as source_stream_1, xopen( None, sources[1] ) as source_stream_2: source_streams = [source_stream_1, source_stream_2] reorganize = all([source == os.devnull for source in sources]) if reorganize: # SRA data is live if sra_paired_end: source_streams = [sra_process.stdout, sra_process.stdout] else: source_streams = [sra_process.stdout, open(os.devnull)] break_outer_loop = False while True: if not to_stdout: '''Name files using Hadoop task environment property mapred.task.partition.''' if gzip_output: try: output_file = os.path.join( destination, '.'.join([ os.environ['mapred_task_partition'], str(file_number), 'gz' ]) ) except KeyError: '''Hadoop 2.x: mapreduce.task.partition; see http://hadoop.apache.org/docs/r2.0.3-alpha/ hadoop-project-dist/hadoop-common/ DeprecatedProperties.html.''' output_file = os.path.join( destination, '.'.join([ os.environ['mapreduce_task_partition'], str(file_number), 'gz' ]) ) open_args = [output_file, 'a', gzip_level] else: try: output_file = os.path.join( destination, '.'.join([ os.environ['mapred_task_partition'], str(file_number) ]) ) except KeyError: output_file = os.path.join( destination, '.'.join([ os.environ['mapreduce_task_partition'], str(k), str(file_number) ]) ) open_args = [output_file, 'a'] try: os.makedirs(os.path.dirname(output_file)) except OSError: pass else: open_args = [] '''Use xopen to handle compressed streams and normal streams generally.''' with xopen(gzip_output if not to_stdout else '-', *open_args) \ as output_stream: perform_push = False line_numbers = [0, 0] read_next_line = True nucs_read = 0 pairs_read = 0 while True: if read_next_line: # Read next line only if FASTA mode didn't already lines = [] for source_stream in source_streams: lines.append(source_stream.readline()) read_next_line = True if not lines[0]: break_outer_loop = True break line_numbers = [i + 1 for i in line_numbers] lines = [line.strip() for line in lines] bad_record_skip = False if lines[0][0] in fastq_cues: if records_to_consume and not skipped: '''Skip lines as necessary; for paired-end reads skip the largest even number of records less than records_to_consume.''' if len(source_urls) == 1: # single-end line_skip_count = max( skip_count * 4 - 1, 0 ) else: # paired-end line_skip_count = max( ((skip_count / 2) * 4 - 1), 0 ) for _ in xrange(line_skip_count): next(source_stream_2) for _ in xrange(line_skip_count): next(source_stream_1) if skip_count: lines = [] for source_stream in source_streams: lines.append(source_stream.readline()) if not lines[0]: break_outer_loop = True break lines = [line.strip() for line in lines] skipped = True seqs = [source_stream.readline().strip() for source_stream in source_streams] line_numbers = [i + 1 for i in line_numbers] plus_lines = [source_stream.readline().strip() for source_stream in source_streams] line_numbers = [i + 1 for i in line_numbers] quals = [source_stream.readline().strip() for source_stream in source_streams] if reorganize and sra_paired_end: # Fix order! lines, seqs, plus_lines, quals = ( [lines[0], plus_lines[0]], [lines[1], plus_lines[1]], [seqs[0], quals[0]], [seqs[1], quals[1]] ) try: assert plus_lines[0][0] == '+', ( 'Malformed read "%s" at line %d of ' 'file "%s".' ) % (lines[0], line_numbers[0], sources[0]) if plus_lines[1]: assert plus_lines[1][0] == '+', ( 'Malformed read "%s" at line %d ' 'of file "%s".' ) % ( lines[1], line_numbers[1], sources[1] ) try: # Kill spaces in name original_qnames = \ [line[1:].replace(' ', '_') for line in lines] except IndexError: raise RuntimeError( 'Error finding QNAME at ' 'line %d of either %s or %s' % ( sources[0], sources[1] ) ) except (AssertionError, IndexError, RuntimeError) as e: if skip_bad_records: print >>sys.stderr, ('Error "%s" ' 'encountered; skipping bad record.' ) % e.message for source_stream in source_streams: source_stream.readline() line_numbers = [ i + 1 for i in line_numbers ] bad_record_skip = True else: raise else: try: quals = [ qual_getter(qual) for qual in quals ] except Exception as e: if skip_bad_records: print >>sys.stderr, ( 'Error "%s" encountered ' 'trying to convert quality ' 'string to Sanger format; ' 'skipping bad record.' ) % e.message bad_record_skip = True else: raise line_numbers = [i + 1 for i in line_numbers] try: for i in xrange(2): assert len(seqs[i]) == len(quals[i]), ( 'Length of read sequence does not ' 'match length of quality string ' 'at line %d of file "%s".' ) % (line_numbers[i], sources[i]) except (AssertionError, IndexError) as e: if skip_bad_records: print >>sys.stderr, ( 'Error "%s" encountered; ' 'skipping bad record.' ) % e.message bad_record_skip = True else: raise elif lines[0][0] in fasta_cues: seqs = [[], []] next_lines = [] for p, source_stream in enumerate(source_streams): while True: next_line \ = source_stream.readline().strip() try: if next_line[0] in fasta_cues: break else: try: seqs[p].append(next_line) except IndexError: raise except IndexError: break next_lines.append(next_line) seqs = [''.join(seq) for seq in seqs] line_numbers = [i + 1 for i in line_numbers] try: try: # Kill spaces in name original_qnames = \ [line[1:].replace(' ', '_') for line in lines] except IndexError: raise RuntimeError( 'Error finding QNAME at ' 'line %d of either %s or %s' % ( sources[0], sources[1] ) ) except (AssertionError, IndexError, RuntimeError) as e: if skip_bad_records: print >>sys.stderr, ('Error "%s" ' 'encountered; skipping bad record.' ) % e.message for source_stream in source_streams: source_stream.readline() line_numbers = [ i + 1 for i in line_numbers ] bad_record_skip = True else: raise else: try: quals = [ 'h'*len(seq) for seq in seqs ] except Exception as e: if skip_bad_records: print >>sys.stderr, ( 'Error "%s" encountered ' 'trying to convert quality ' 'string to Sanger format; ' 'skipping bad record.' ) % e.message bad_record_skip = True else: raise line_numbers = [i + 1 for i in line_numbers] lines = next_lines read_next_line = False if bad_record_skip: seqs = [] # Fake record-printing to get to records_to_consume if source_streams[-1].name == os.devnull: records_printed += 1 else: records_printed += 2 elif len(original_qnames) == 2 and original_qnames[1]: # Paired-end write if original_qnames[0] == original_qnames[1]: # Add paired-end identifiers original_qnames[0] += '/1' original_qnames[1] += '/2' assert seqs[1] assert quals[1] seqs = [seq.upper() for seq in seqs] reversed_complement_seqs = [ seqs[0][::-1].translate( _reversed_complement_translation_table ), seqs[1][::-1].translate( _reversed_complement_translation_table ) ] if seqs[0] < reversed_complement_seqs[0]: left_seq = seqs[0] left_qual = quals[0] left_reversed = '0' else: left_seq = reversed_complement_seqs[0] left_qual = quals[0][::-1] left_reversed = '1' if seqs[1] < reversed_complement_seqs[1]: right_seq = seqs[1] right_qual = quals[1] right_reversed = '0' else: right_seq = reversed_complement_seqs[1] right_qual = quals[1][::-1] right_reversed = '1' if short_qnames: left_qname_to_write = encode(read_index) + '/1' right_qname_to_write = encode( read_index ) + '/2' else: left_qname_to_write = original_qnames[0] right_qname_to_write = original_qnames[1] print >>output_stream, '\t'.join( [ left_seq, left_reversed, qname_from_read( left_qname_to_write, seqs[0] + quals[0], sample_label, mate=seqs[1] ), '\n'.join([ round_quality_string( left_qual ), right_seq ]), right_reversed, qname_from_read( right_qname_to_write, seqs[1] + quals[1], sample_label, mate=seqs[0] ), round_quality_string(right_qual) ] ) records_printed += 2 _output_line_count += 1 else: seqs[0] = seqs[0].upper() reversed_complement_seqs = [ seqs[0][::-1].translate( _reversed_complement_translation_table ) ] # Single-end write if seqs[0] < reversed_complement_seqs[0]: seq = seqs[0] qual = quals[0] is_reversed = '0' else: seq = reversed_complement_seqs[0] qual = quals[0][::-1] is_reversed = '1' if short_qnames: qname_to_write = encode(read_index) else: qname_to_write = original_qnames[0] print >>output_stream, '\t'.join( [ seq, is_reversed, qname_from_read( qname_to_write, seqs[0] + quals[0], sample_label ), round_quality_string(qual) ] ) records_printed += 1 _output_line_count += 1 read_index += 1 for seq in seqs: nucs_read += len(seq) if records_printed == records_to_consume: break_outer_loop = True perform_push = True break if not to_stdout and not records_to_consume and \ nucs_read > nucleotides_per_input: file_number += 1 break if verbose: print >>sys.stderr, ( 'Exited with statement; line numbers are %s' % line_numbers ) if (not to_stdout) and (push_url.is_nfs or push_url.is_s3 or push_url.is_hdfs) \ and ((not records_to_consume) or (records_to_consume and perform_push)): print >>sys.stderr, 'Pushing "%s" to "%s" ...' % ( output_file, push_url.to_url() ) print >>sys.stderr, 'reporter:status:alive' mover.put(output_file, push_url.plus(os.path.basename( output_file ))) try: os.remove(output_file) except OSError: pass if break_outer_loop: break if verbose: print >>sys.stderr, 'Exiting source streams...' if verbose: print >>sys.stderr, 'Exited source streams.' # Clear temporary directory for input_file in os.listdir(temp_dir): try: os.remove(os.path.join(temp_dir, input_file)) except OSError: pass if 'sra_process' in locals(): sra_process.stdout.close() sra_return_code = sra_process.wait() if sra_return_code > 0: raise RuntimeError(('fastq-dump terminated with exit ' 'code %d. Command run was "%s".') % (sra_return_code, fastq_dump_command)) del sra_process
cluster_size = 8 # column 1: number of pieces # column 2: size of a bitfield message # column 3: size of compressed bitfield message (worst case) # column 4: size of compressed bitfield message (realistic) bitfield = ['0'] * num_pieces for i in xrange(num_pieces + 1): scattered_pieces = gen_scattered_pieces(i, 0, num_pieces) for pos in scattered_pieces: bitfield[pos] = '1' # divide by 3 because each byte is HEX encoded with spaces in between scatter_size = len(encode.encode(''.join(bitfield))) / 3 realistic_size = len(encode.encode_pieces( \ gen_realistic_pieces(i, num_pieces, cluster_size))) / 3 out.write('%d\t%d\t%d\t%d\n' % (i, \ 5 + (num_pieces + 7)/8, \ 4 + 2 + scatter_size, \ 4 + 2 + realistic_size)) # print ''.join(bitfield) out.close() f = open('bitfield-sizes.gnuplot', 'w+') f.write(''' set term png small size 640,480 set output "bitfield-sizes.png"
def main( self, url, ie_key, subtitles, filename, filedesc, downloadkey, convertkey, username, oauth ): """Main worker code.""" # Get a lock to prevent double-running with same task ID lockkey = 'tasklock:' + self.request.id if redisconnection.exists(lockkey): raise TaskError("Task has already been run") else: redisconnection.setex(lockkey, 'T', 7 * 24 * 3600) # Generate temporary directory for task for i in range(10): # 10 tries id = os.urandom(8).encode('hex') outputdir = '/srv/v2c/output/' + id if not os.path.isdir(outputdir): os.mkdir(outputdir) break else: raise TaskError("Too many retries to generate a task id") s = Stats() def statuscallback(text, percent): if text is not None: s.text = text if percent is not None: s.percent = percent print '%d: %s' % (s.percent, s.text) self.update_state( state='PROGRESS', meta={'text': s.text, 'percent': s.percent} ) def errorcallback(text): raise TaskError(text) try: statuscallback('Downloading...', -1) d = download.download( url, ie_key, downloadkey, subtitles, outputdir, statuscallback, errorcallback ) if not d: errorcallback('Download failed!') file = d['target'] if not file: errorcallback('Download failed!') subtitles = subtitles and d['subtitles'] statuscallback('Converting...', -1) file = encode.encode(file, convertkey, statuscallback, errorcallback) if not file: errorcallback('Convert failed!') ext = file.split('.')[-1] statuscallback('Configuring Pywikibot...', -1) pywikibot.config.authenticate['commons.wikimedia.org'] = \ (consumer_key, consumer_secret) + oauth pywikibot.config.usernames['commons']['commons'] = username pywikibot.Site('commons', 'commons', user=username).login() statuscallback('Uploading...', -1) filename += '.' + ext filename, wikifileurl = upload.upload( file, filename, url, http_host, filedesc, username, statuscallback, errorcallback ) if not wikifileurl: errorcallback('Upload failed!') if subtitles: statuscallback('Uploading subtitles...', -1) try: subtitleuploader.subtitles( subtitles, filename, username, statuscallback, errorcallback ) except Exception, e: statuscallback(type(e).__name__ + ": " + str(e), None) print e pass except pywikibot.Error: # T124922 workaround exc_info = sys.exc_info() raise TaskError( ( u'pywikibot.Error: %s: %s' % ( exc_info[0].__name__, exc_info[1] ) ).encode('utf-8')), None, exc_info[2] else: statuscallback('Done!', 100) return filename, wikifileurl finally: statuscallback('Cleaning up...', -1) pywikibot.config.authenticate.clear() pywikibot.config.usernames['commons'].clear() pywikibot._sites.clear() shutil.rmtree(outputdir)