def test_decode_msg_random_key(): # 'abcdefghijklmnopqrstuvwxyz' key = '!)"(£*%&><@abcdefghijklmno' coder = Coder(key) plaintext = 'calendario' ciphertext = '"!a£c(!g>d' assert coder.decrypt(ciphertext) == plaintext
def build(self, checker): # Initialize coder print("Initializing coder...") self.dictionary = Dictionary(checker) self.coder = Coder(self.dictionary) # Load all data from DB print("Fetching data from database...") allData = self.db.getFixDataForChecker(checker) allDataLen = len(allData) print("Done, fetched {0} records".format(allDataLen)) if allDataLen < 1: print("No data found") return # Encode all data print("Encoding all data and writing to output file...") i = 0 (maxBug, maxFix, maxUnk) = self.checkers.getModelStatsForChecker(checker) with open(config.cfTrainFilenameFormat.format(checker), 'w') as f: while i < allDataLen: checkerInfo = self.checkers.extractTokensForChecker( checker, allData[i][4]) encodedBugData, initialUnkList = self.coder.encode( allData[i][1], checkerData=checkerInfo) encodedFixData, finalUnkList = self.coder.encode( allData[i][2], unkList=initialUnkList, reverse=False) if -1 in encodedBugData: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (bug), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) elif -1 in encodedFixData: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (fix), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) elif len(encodedBugData) > maxBug or len( encodedFixData) > maxFix or len(finalUnkList) > maxUnk: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (lengths), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) else: print("{0}: [{2} - {3} ({1})] Done (lenUnk = {1})".format( i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) f.write( json.dumps({ 'x': encodedBugData, 'y': encodedFixData }) + '\n') i += 1 print('Done {0}'.format(i), file=sys.stderr) print("All done, exiting...")
class TestCoder(unittest.TestCase): def setUp(self): # Init coder print("Initializing coder...") self.checker = self.checkerList[self.checkerIndex] self.dictionary = Dictionary(self.checker) self.coder = Coder(self.dictionary) # Load all data from DB print("Fetching data from database...") self.allData = self.db.getFixDataForChecker(self.checker) self.allDataLen = len(self.allData) print("Done, fetched {0} records".format(self.allDataLen)) def tearDown(self): self.checkerIndex += 1 @classmethod def setUpClass(self): print("Starting up...") self.db = CFDatabase(config.getCfDbFile()) self.checkers = Checkers() self.checkerList = ['deadcode.DeadStores'] self.checkerIndex = 0 def testDeadcodeDeadStores(self): self.assertTrue(self.allDataLen > 0, msg="No data found") # Encode all data print("Testing encoding") i = 0 while i < self.allDataLen: checkerInfo = self.checkers.extractTokensForChecker( self.checker, self.allData[i][4]) encodedBugData, initialUnkList = self.coder.encode( self.allData[i][1], checkerData=checkerInfo) encodedFixData, finalUnkList = self.coder.encode( self.allData[i][2], unkList=initialUnkList, reverse=False) if -1 in encodedBugData: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (bug), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) elif -1 in encodedFixData: print( "{0}: [{2} - {3} ({1})] Some tokens were not parsed (fix), ignoring (lenUnk = {1})" .format(i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) else: print("{0}: [{2} - {3} ({1})] Done (lenUnk = {1})".format( i + 1, len(finalUnkList), len(encodedBugData), len(encodedFixData))) textBug = self.coder.decode(encodedBugData, finalUnkList, True) textFix = self.coder.decode(encodedFixData, finalUnkList) self.assertEqual(textBug, self.allData[i][1]) self.assertEqual(textFix, self.allData[i][2]) i += 1 print("All done.")
def setUp(self): # Init coder print("Initializing coder...") self.checker = self.checkerList[self.checkerIndex] self.dictionary = Dictionary(self.checker) self.coder = Coder(self.dictionary) # Load all data from DB print("Fetching data from database...") self.allData = self.db.getFixDataForChecker(self.checker) self.allDataLen = len(self.allData) print("Done, fetched {0} records".format(self.allDataLen))
def encode_clicked(self, widget): enc = self.name_combo.get_active() if enc == 1: start = self.textbuffer.get_start_iter() end = self.textbuffer.get_end_iter() string = self.textbuffer.get_text(start, end, True) coder = Coder().url_encode(string) self.textbuffer_result.set_text(coder) elif enc == 0: start = self.textbuffer.get_start_iter() end = self.textbuffer.get_end_iter() string = self.textbuffer.get_text(start, end, True) coder = Coder().base64_encode(string) self.textbuffer_result.set_text(coder)
def __init__(self, N, mask_frac, stabil=10): self.stabil = stabil self.mask_frac = mask_frac size = N**2 self.size = size self.mask_size = int(self.size / self.mask_frac) pad = 0.0001 ### Create layers self.act = tanh_activator(pad, size) #self.act_mask = gate_activator(pad, size) #self.act_mask = logistic_activator(pad, size) self.act_mask = heaviside_activator(size) self.reg_layer, self.mem_layer, self.ptr_layer = (Layer( k, (N, N), self.act, Coder(self.act)) for k in "rmp") # Gating masks self.masks = {} self.w_mask = np.zeros((size, size)) # Weight matrices self.w_mm = np.zeros((size, size)) self.w_pm = np.zeros((size, size)) self.w_mp = np.zeros((size, size)) # Dummy bias to avoid extra memory allocation self.dummy_bias = np.zeros((size, 1))
def __init__(self, layer_shape, pad, activator, learning_rule, register_names, shapes={}, tokens=[], orthogonal=False, verbose=False): self.tokens = tokens self.orthogonal = orthogonal self.register_names = register_names # default registers layer_size = layer_shape[0] * layer_shape[1] act = activator(pad, layer_size) registers = { name: Layer(name, layer_shape, act, Coder(act)) for name in register_names } self.net = NVMNet(layer_shape, pad, activator, learning_rule, registers, shapes=shapes, tokens=tokens, orthogonal=orthogonal, verbose=verbose)
def get_postions_and_labels(align, ref, region): """ Returns list of corresponding positions and labels. Parameters ---------- align : align for which positions and labels are required ref : corresponding reference sequence region : corresponding region """ start, end = region.start, region.end if start is None: start = 0 if end is None: end = float('inf') start, end = max(start, align.start), min(end, align.end) positions = [] labels = [] pairs = get_pairs(align.align, ref) current_position = None insert_count = 0 for pair in itertools.dropwhile(lambda p: (p.ref_position is None) or (p.ref_position < start), pairs): if pair.ref_position == align.align.reference_end or (pair.ref_position is not None and pair.ref_position >= end): break if pair.ref_position is None: insert_count += 1 else: insert_count = 0 current_position = pair.ref_position position = (current_position, insert_count) positions.append(position) label = pair.query_base.upper() if pair.query_base else Coder.GAP try: encoded_label = Coder.encode(label) except KeyError: encoded_label = Coder.encode(Coder.UNKNOWN) labels.append(encoded_label) return positions, labels
def main(app): codes = create_codes(app) cc = Coder.create_from_codes(codes) for rel_path, code in cc.itercode(): print "\n\n============ %s ============\n" % rel_path, code #if rel_path.endswith('.py'): # print check(code, 'test.py') return (codes, cc)
def test_image_base64_encode_with_dataurl(self): image = 'images/One_black_Pixel.png' test_value = ('<img src="' 'AAAEAAAABCAIAAACQd1PeAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjw' 'v8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAAMSURBVBhXY2BgYAAAA' 'AQAAVzN/2kAAAAASUVORK5CYII=" >') dataurl = True self.assertEqual(Coder().image_base64_encode(image, dataurl), test_value)
def ric_encode(imgbuf, config): img = Image.open(StringIO(imgbuf)) layers = Coder().encode(img, config) wrapped_layers, offsetTable = wrapper.wrapLayers(layers) output = "" output += iso_media.write_box("FTYP", "RIC ") output += iso_media.write_box("ILOT", write_layer_offsets(offsetTable)) output += wrapped_layers return output
def _send_message(self): self.chat_id = input('Enter chat id:\n') if (not self.chat_id): self.chat_id = LISTENER message = input('Enter message:\n') self._send_bits(START_MESSAGE) self._send_bits(Coder.encode(message)) self._send_bits(END_MESSAGE)
def main(): print(_welcome) while True: mode = input('\nCommand: ') if mode == 'q': return output = '' msg = input('Encode: ' if mode == '0' else 'Decode: ') if mode == '0': output = Coder.msg_to_code(msg) print(' "{}" 已复制'.format(output)) else: output = Coder.code_to_msg(msg) print(' "{}" 已复制'.format(output)) os.system('echo "{}" | pbcopy'.format(output)) print('')
def main(variant): with open('variant', 'w') as f: f.write(variant) encoder = Coder(variant) paths = [] chunk_num = 0 max_chunk_num = 2 while True: tokens = {} i = 1 if chunk_num == max_chunk_num: break documents = docreader.DocumentStreamReader( docreader.parse_command_line().files) for doc in documents: if chunk_num == 0: paths.append(doc.url) words = doc2words.extract_words(doc.text) for word in set(words): if word in tokens: tokens[word].append(i) elif len(word) % max_chunk_num == chunk_num: tokens[word] = array('l', [i]) i += 1 for token in tokens: tokens[token] = encoder.encode(tokens[token]) with open('index{}.pkl'.format(chunk_num), 'wb') as f: pickle.dump(tokens, f) chunk_num += 1 first = False with open('paths.pkl', 'wb') as f: pickle.dump(paths, f)
def inference(args): cuda_available = torch.cuda.is_available() model = RNN.load_from_checkpoint(args.model_path).to('cuda:0' if cuda_available else 'cpu') dataset = InferenceDataset(args.data_path) dataloader = DataLoader(dataset, args.batch_size, num_workers=args.num_workers) result = defaultdict(lambda: defaultdict(lambda: Counter())) print('>> started inference') for batch in dataloader: contig, position, X = batch X = X.type(torch.cuda.LongTensor if cuda_available else torch.LongTensor) output = model(X) Y = torch.argmax(output, dim=2).long().cpu().numpy() for c, pos, ys in zip(contig, position, Y): for p, y in zip(pos, ys): base = Coder.decode(y) current_position = (p[0].item(), p[1].item()) result[c][current_position][base] += 1 print('>> started processing of results') contigs = dataset.contigs records = [] for contig in result: values = result[contig] sorted_positions = sorted(values) sorted_positions = list(itertools.dropwhile(lambda x: x[1] != 0, sorted_positions)) first = sorted_positions[0][0] contig_data = contigs[contig] seq = contig_data[0][:first] for _, p in enumerate(sorted_positions): base, _ = values[p].most_common(1)[0] if base == Coder.GAP: continue seq += base last_position = sorted_positions[-1][0] seq += contig_data[0][last_position+1:] seq = Seq(seq) record = SeqRecord.SeqRecord(seq, id=contig) records.append(record) with open(args.out_path, 'w') as f: SeqIO.write(records, f, 'fasta')
def make_nvmnet(programs=None, registers=None): # default program if programs is None: programs = { "test": """ mov d2 true loop: mov d1 here jmp d1 here: mov d0 d2 exit """ } # set up activator activator, learning_rule = logistic_activator, hebbian # activator, learning_rule = tanh_activator, hebbian # make network layer_shape = (16, 16) layer_size = layer_shape[0] * layer_shape[1] pad = 0.01 act = activator(pad, layer_size) # default devices # changing devices to registers if registers is None: registers = { "d%d" % d: Layer("d%d" % d, layer_shape, act, Coder(act)) for d in range(3) } # assemble and link programs # changing devices to registers nvmnet = NVMNet(layer_shape, pad, activator, learning_rule, registers) for name, program in programs.items(): nvmnet.assemble(program, name, verbose=1) nvmnet.link(verbose=1) # initialize pointer at last program nvmnet.activity["ip"] = nvmnet.layers["ip"].coder.encode(name) return nvmnet
def choose_image_clicked(self, widget): dialog = Gtk.FileChooserDialog( "Please choose an image", self, Gtk.FileChooserAction.OPEN, (Gtk.STOCK_CANCEL, Gtk.ResponseType.CANCEL, Gtk.STOCK_OPEN, Gtk.ResponseType.OK)) self.add_filters(dialog) response = dialog.run() if response == Gtk.ResponseType.OK: filename = dialog.get_filename() coder = Coder().image_base64_encode(filename, self.dataurl) self.textbuffer_result.set_text(coder) self.textbuffer.set_text("Image encoded: " + filename) self.textbuffer_result.set_text(coder) elif response == Gtk.ResponseType.CANCEL: self.textbuffer.set_text("Canceled") dialog.destroy()
def test_digits(verbose=False): epochs = 1000 pad = 0.0001 feedback = True split_learn = True biases = True Ns = [256 for _ in range(3)] tokens = [str(x) for x in range(100)] net = CHL_Net(Ns, feedback, split_learn, biases) # Input/output pattern pairs (0-99) in_coder = Coder(tanh_activator(pad, (Ns[0]))) out_coder = Coder(tanh_activator(pad, Ns[-1])) patterns = [(in_coder.encode(tok), out_coder.encode(tok)) for tok in tokens] net.train(epochs, patterns, verbose=verbose)
def predict(self, id, checker): # Load all bugs print("Loading bug data...") ids = [] if id == -1: bugs = self.ccdb.getAllBugsForChecker(checker) ids = [x[0] for x in bugs] else: ids.append(id) # Loading model print("Loading model...") model = load_model(config.cfModelFilenameFormat.format(checker)) model.summary() vLabels = ['NOT OK', 'OK', 'Skipped'] # Initialize coder print("Initializing coder...") self.dictionary = Dictionary(checker) self.coder = Coder(self.dictionary) self.totalDictionaryLength = self.dictionary.length() # Predicting print("Starting predictions...") for i in ids: allData = self.ccdb.getBugData(i) if allData.getChecker( ) not in globals.availableCheckers or allData.getChecker( ) != checker: print("Bug #{0} - checker not supported".format(i)) else: # Load extra tokens from checker message checkerInfo = self.checkers.extractTokensForChecker( allData.getChecker(), allData.getMessage()) # Retrieve code fragment with bug fileRelativePath = self.convertFilePathToRepoRelativePath( allData.getFile()) fullCodeWithBug = self.vcs.getFileContents( fileRelativePath, self.commits[self.currentCommitIndex]) extractor = CodeExtractor(allData) extractor.loadCodeFromText(fullCodeWithBug) extractor.extractBugCode() bugCodeFragment = extractor.getBugCodeFragment() fixCodeFragment = '' # Encode it encodedBugData, initialUnkList = self.coder.encode( bugCodeFragment, checkerData=checkerInfo) # Convert to one-hot MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1] if len(encodedBugData) > MODEL_X_MAX_LEN: print( "Bug #{0} - Code too big for model, ignored".format(i)) continue elif id == -1: print("Bug #{0} - Good to go".format(i)) continue noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData) if noZerosToPad > 0: encodedBugData = self.coder.applyPadding( encodedBugData, noZerosToPad) X = np.zeros((1, MODEL_X_MAX_LEN, self.totalDictionaryLength)) X[0] = self.coder.convertToOneHot( encodedBugData, np.zeros((MODEL_X_MAX_LEN, self.totalDictionaryLength))) # Predict and convert from one-hot Y = self.coder.convertFromOneHot(model.predict(X)[0]) print(Y) # Decode Y = self.coder.removePadding(Y) fixCodeFragment = self.coder.decode(Y, initialUnkList) #Verify? vStatus = 2 if config.cfVerifyPrediction: # Apply fix in source code file extractor.applyFix(fixCodeFragment) extractor.saveToFile(allData.getFile()) # Run CodeChecker and analyze code self.codeChecker.check(True) resolvedIds = self.getDiffResolvedIds() # Check if ID is resolved in tmp folder isFixed = i in resolvedIds # Set vStatus accordingly if isFixed: vStatus = 1 else: vStatus = 0 #Print print("Bug #{0} - summary".format(i)) print("== Code fragment with bug ==") print(bugCodeFragment) print("== Suggested fix ==") print(fixCodeFragment) print("Verification: {0}".format(vLabels[vStatus])) a = ' ' while a != 'y' and a != 'n': a = input("Apply fix? (y/n): ") if a == 'y': if not config.cfVerifyPrediction: # Apply fix in source code file extractor.applyFix(fixCodeFragment) extractor.saveToFile(allData.getFile()) elif config.cfVerifyPrediction: # Revert file contents self.vcs.checkout(self.commits[self.currentCommitIndex]) print('Done') print("All done, exiting...")
def runCoderNormal(cfgFile): normalCoder = Coder(coderNormal) normalCoder.run(cfgFile)
return W, Z, residual if __name__ == '__main__': np.set_printoptions(linewidth=200, formatter={'float': lambda x: '% .2f' % x}) N = 8 PAD = 0.05 from activator import * act = tanh_activator(PAD, N) # act = logistic_activator(PAD, N) c = Coder(act) g = Layer("gates", N, act, c) input_layers = { name: Layer(name, N, act, c) for name in ["gates", "op1", "op2"] } s = Sequencer(g, input_layers) v_old = g.coder.encode("SET") # s.add_transit(new_state="SET") for to_layer in ["FEF", "SC"]: for from_layer in ["FEF", "SC"]: v_new = s.add_transit(new_state=to_layer + from_layer, gates=v_old, op1=to_layer, op2=from_layer)
def main(self): # Do analysis shutil.rmtree(config.getTmpDir()) self.codeChecker.check(True) # Diff new newBugs = self.getDiffNew() if len(newBugs) < 1: print('No new bugs introduced, commit is accepted!') return print("New bugs found! Count: {0}. Attempting repairs...".format(len(newBugs))) # Load models models = {} for checker in globals.availableCheckers: models[checker] = load_model(config.cfModelFilenameFormat.format(checker)) # Load all content from files having new files = set([self.convertFilePathToRepoRelativePath(x.getFile()) for x in newBugs]) fileContents = {} for f in files: fn = config.getRepoDir() + f with open(fn, 'r') as fh: fileContents[f] = ''.join(fh.readlines()) # For each file sort by bug line desc suggestions = [] validSuggestions = 0 for f in files: bugs = [x for x in newBugs if self.convertFilePathToRepoRelativePath(x.getFile()) == f] bugs.sort(key=lambda x: x.getLine(), reverse=True) print("=== File: {0} ===".format(f)) # For each bug get a suggestion and test it for b in bugs: print("L{0}, Type: {1}".format(b.getLine(), b.getChecker())) # Prepare useful data dictionary = Dictionary(b.getChecker()) coder = Coder(dictionary) totalDictionaryLength = dictionary.length() # Prepare and extract bug fragment checkerInfo = self.checkers.extractTokensForChecker(b.getChecker(), b.getMessage()) extractor = CodeExtractor(b) extractor.loadCodeFromText(fileContents[f]) extractor.extractBugCode() bugCodeFragment = extractor.getBugCodeFragment() fixCodeFragment = '' # Encode it encodedBugData, initialUnkList = coder.encode(bugCodeFragment, checkerData = checkerInfo) # Convert to one-hot MODEL_X_MAX_LEN = models[b.getChecker()].get_layer(index = 0).input_shape[1] if len(encodedBugData) > MODEL_X_MAX_LEN: print("Ignored: Code too big for model") continue noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData) if noZerosToPad > 0: encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad) X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength)) X[0] = coder.convertToOneHot(encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength))) # Predict and convert from one-hot Y = coder.convertFromOneHot(models[b.getChecker()].predict(X)[0]) Y = coder.removePadding(Y) # Decode fixCodeFragment = coder.decode(Y, initialUnkList)[:-1] #Verify? vStatus = 2 if config.cfVerifyPrediction: # Apply fix in source code file extractor.applyFix(fixCodeFragment) extractor.saveToFile(b.getFile()) # Run CodeChecker and analyze code shutil.rmtree(config.getTmpDir()) compilationLog = self.codeChecker.check(True) newBugsAfterFix = self.getDiffNew() # Check if ID is resolved in tmp folder isFixed = 'Build failed' not in compilationLog for nb in newBugsAfterFix: if self.isBugDataEqual(b, nb): isFixed = False # Set vStatus accordingly if isFixed: vStatus = 1 else: vStatus = 0 # Revert file extractor.loadCodeFromText(fileContents[f]) extractor.saveToFile(b.getFile()) if vStatus == 0: print("Verification: Negative, cannot be applied") elif vStatus == 1: print("Verification: Positive, can be applied") validSuggestions += 1 elif vStatus == 2: print("Verification: Skipped") validSuggestions += 1 sugg = SuggestionData(f, b, bugCodeFragment, fixCodeFragment, vStatus) suggestions.append(sugg) print("Valid suggestions prepared for {0} / {1} bugs.".format(validSuggestions, len(newBugs))) if validSuggestions > 0: print("Apply valid suggestions (a), display them (d), ignore them (i) or abort commit (q)?") apply = False choice = True while choice: c = sys.stdin.read(1) if c == 'a': apply = True choice = False print("Applying fixes...") elif c == 'i': choice = False print("Fixes ignored...") elif c == 'd': self.displaySuggestions(suggestions) print("Apply valid suggestions (a), ignore them (i) or abort commit (q)?") elif c == 'q': print("Aborting commit...") sys.exit(1) if apply: self.applyValidFixes(suggestions, files) print("Fixes applied!") if validSuggestions != len(newBugs): print("Unable to fix all bugs, continue with commit (c) or abort (q)?") choice = True while choice: c = sys.stdin.read(1) if c == 'c': choice = False print("Continuing...") elif c == 'q': print("Aborting commit...") sys.exit(1) else: print("Bugs corrected, commit is good to go!")
def build(self, checker, startK, startBatch): # Initialize coder print("Initializing coder...") self.dictionary = Dictionary(checker) self.coder = Coder(self.dictionary) self.totalDictionaryLength = self.dictionary.length( ) # + globals.firstAvailableToken # Load training data from file print("Loading training data...") data = [] with open(config.cfTrainFilenameFormat.format(checker), "r") as f: data = f.readlines() random.shuffle(data) dataLen = len(data) print("Done, fetched {0} records".format(dataLen)) if dataLen < 1: print("No data found") return # Json load print("Converting to objects...") self.X = [] self.Y = [] self.ObjInd = 0 self.ObjMax = dataLen xMaxLen = 0 yMaxLen = 0 for record in data: obj = json.loads(record[:-1]) self.X.append(obj['x']) self.Y.append(obj['y']) if len(obj['x']) > xMaxLen: xMaxLen = len(obj['x']) if len(obj['y']) > yMaxLen: yMaxLen = len(obj['y']) # Padding print("Counted input and output lengths (X = {0}, Y = {1})...".format( xMaxLen, yMaxLen)) # Preparing model print("Preparing model...") batchSaveIndex = 0 batchSaveCounter = 0 batchSaveThreshold = 10000 if startK == 0 and startBatch == 0: model = Sequential() model.add( LSTM(config.cfTrainHiddenSize, input_shape=(xMaxLen, self.totalDictionaryLength))) model.add(RepeatVector(yMaxLen)) for _ in range(config.cfTrainNumLayers): model.add(LSTM(config.cfTrainHiddenSize, return_sequences=True)) model.add(TimeDistributed(Dense(self.totalDictionaryLength))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) else: modelFormat = 'checkpoint_epoch_{0}.{1}.h5'.format( startK - 1, checker) if startBatch > 0: batchSaveIndex = int(startBatch / batchSaveThreshold) modelFormat = 'checkpoint_epoch_b{2}.{0}.{1}.h5'.format( startK, checker, batchSaveIndex - 1) model = load_model(modelFormat) ''' print("Converting data...") X_s = np.zeros((dataLen, xMaxLen, self.totalDictionaryLength)) Y_s = np.zeros((dataLen, yMaxLen, self.totalDictionaryLength)) for j in range(dataLen): valueX = X[j] noZerosToPad = xMaxLen - len(valueX) if noZerosToPad > 0: valueX = self.coder.applyPadding(valueX, noZerosToPad) valueY = Y[j] noZerosToPad = yMaxLen - len(valueY) if noZerosToPad > 0: valueY = self.coder.applyPadding(valueY, noZerosToPad) self.coder.convertToOneHot(valueX, X_s[j]) self.coder.convertToOneHot(valueY, Y_s[j]) ''' # Training model ''' print("Training...") for k in range(startK, config.cfTrainNoEpochs): #model.fit(X_s, Y_s, epochs=1)#, validation_split=0.2) model.fit(self.datagen(config.cfTrainBatchSize, xMaxLen, yMaxLen), epochs=1, steps_per_epoch=103)#, validation_split=0.2) model.save('checkpoint_epoch_{0}.{1}.h5'.format(k, checker)) ''' #""" print("Training model...") for k in range(startK, config.cfTrainNoEpochs): i = 0 model.reset_metrics() if k == startK: i = startBatch while i < dataLen: end = i + config.cfTrainBatchSize if end > dataLen: end = dataLen #''' X_s = np.zeros((end - i, xMaxLen, self.totalDictionaryLength)) Y_s = np.zeros((end - i, yMaxLen, self.totalDictionaryLength)) for j in range(i, end): valueX = self.X[j] noZerosToPad = xMaxLen - len( valueX) #int((xMaxLen - len(valueX)) / 2) if noZerosToPad > 0: valueX = self.coder.applyPadding(valueX, noZerosToPad) valueY = self.Y[j] noZerosToPad = yMaxLen - len( valueY) #int((yMaxLen - len(valueY)) / 2) if noZerosToPad > 0: valueY = self.coder.applyPadding(valueY, noZerosToPad) zerosX = np.zeros((xMaxLen, self.totalDictionaryLength)) zerosY = np.zeros((yMaxLen, self.totalDictionaryLength)) X_s[j - i] = self.coder.convertToOneHot(valueX, zerosX) Y_s[j - i] = self.coder.convertToOneHot(valueY, zerosY) result = model.train_on_batch(X_s, Y_s, reset_metrics=False) #''' #result = model.train_on_batch(X_s[i:end], Y_s[i:end]) #del X_s #del Y_s print( "[{2}] Done batch {0}-{1} (loss: {3:.3f}, accuracy: {4:.3f})" .format(i, end, k, result[0], result[1])) i += config.cfTrainBatchSize batchSaveCounter += config.cfTrainBatchSize if batchSaveCounter >= batchSaveThreshold: batchSaveCounter = 0 model.save('checkpoint_epoch_b{2}.{0}.{1}.h5'.format( k, checker, batchSaveIndex)) batchSaveIndex += 1 model.save('checkpoint_epoch_{0}.{1}.h5'.format(k, checker)) batchSaveIndex = 0 batchSaveCounter = 0 #""" print("All done, exiting...")
def __init__(self, nvmnet, opdef, arg_regs, res_regs, op_reg): self.opdef = opdef self.op_name = opdef.op_name self.operations = dict(opdef.operations) self.in_ops = list(opdef.in_ops) self.out_ops = list(opdef.out_ops) self.tokens = list(opdef.tokens) self.arg_registers = arg_regs self.res_registers = res_regs self.op_register = op_reg self.hidden_name = "%s_gh" % self.op_name self.gate_name = "%s_go" % self.op_name # 1. OP->HID, 2. OP->OP, [3. RESX->RESX, 4. RESX->RESY for op in ops] self.gate_map = GateMap([(self.hidden_name, self.op_register, "u"), (self.op_register, self.op_register, "u")] + [("res", "res", op) for op in self.operations] + [("res", "arg", op) for op in self.operations]) # Hidden gate layer N = 16 self.hidden_size = N**2 hid_activator = tanh_activator(nvmnet.pad, self.hidden_size) self.hidden_layer = Layer(self.hidden_name, (N, N), hid_activator, Coder(hid_activator)) # Gate output layer self.gate_size = self.gate_map.get_gate_count() gate_activator = heaviside_activator(self.gate_size) self.gate_layer = Layer(self.gate_name, (self.gate_size, 1), gate_activator, Coder(gate_activator)) # Gate layer (detects operator) hidden_gate_layer = { "name": self.hidden_name, "neural model": "nvm", "rows": 1, "columns": self.hidden_size, } gate_layer = { "name": self.gate_name, "neural model": "nvm_heaviside", "rows": 1, "columns": self.gate_size, } self.structure = { "name": self.op_name, "type": "parallel", "layers": [hidden_gate_layer, gate_layer] } # Make gate connection def build_gate(to_name, index, suffix=""): return { "name": get_conn_name(to_name, self.gate_name, suffix), "from layer": self.gate_name, "to layer": to_name, "type": "subset", "opcode": "add", "subset config": { "from row end": 1, "from column start": index, "from column end": index + 1, "to row end": 1, "to column end": 1, }, "plastic": False, "gate": True, } # Squash weights to cancel gain def build_squash(to_name, suffix="", gated=True): return { "name": get_conn_name(to_name, to_name, suffix), "from layer": "bias", "to layer": to_name, "type": "fully connected", "opcode": "add", "plastic": False, "gated": gated, } # Make weight/bias connections def build_conns(to_name, from_name, suffix="", gated=True): return [{ "name": get_conn_name(to_name, from_name, suffix + "-w"), "from layer": from_name, "to layer": to_name, "type": "fully connected", "opcode": "add", "plastic": False, "gated": gated }, { "name": get_conn_name(to_name, from_name, suffix + "-b"), "from layer": 'bias', "to layer": to_name, "type": "fully connected", "opcode": "add", "plastic": False, "gated": gated }] self.connections = [] # Hidden gate input self.connections.append( build_gate( self.hidden_name, self.gate_map.get_gate_index( (self.hidden_name, self.op_register, "u")))) self.connections += build_conns(self.hidden_name, self.op_register, gated=True) # Hidden gate recurrence self.connections += build_conns(self.hidden_name, self.hidden_name, gated=False) # Gate activation self.connections += build_conns(self.gate_name, self.hidden_name, gated=False) # Operation squash self.connections.append( build_gate( self.op_register, self.gate_map.get_gate_index( (self.op_register, self.op_register, "u")), self.op_name)) self.connections.append( build_squash(self.op_register, suffix=self.op_name + "-squash")) for op in self.operations: for to_name in self.res_registers: # Recurrent connections self.connections.append( build_gate( to_name, self.gate_map.get_gate_index(("res", "res", op)), op + "-1")) self.connections += build_conns(to_name, to_name, suffix=op, gated=True) # Inter-layer connections self.connections.append( build_gate( to_name, self.gate_map.get_gate_index(("res", "arg", op)), op + "-2")) for from_name in self.arg_registers: if to_name != from_name: self.connections += build_conns(to_name, from_name, suffix=op, gated=True) self.layer_map = { name: nvmnet.layers[name] for name in self.arg_registers + self.res_registers + [self.op_register] } self.layer_map[self.gate_name] = self.gate_layer self.layer_map[self.hidden_name] = self.hidden_layer self.conn_names = tuple(conn["name"] for conn in self.connections)
# 3., 4., 5. model1 = load_model(config.cfModelFilenameFormat.format('deadcode.DeadStores')) model2 = load_model( config.cfModelFilenameFormat.format( 'clang-diagnostic-tautological-constant-out-of-range-compare')) model3 = load_model( config.cfModelFilenameFormat.format('clang-diagnostic-unused-parameter')) model4 = load_model( config.cfModelFilenameFormat.format( 'clang-diagnostic-constant-conversion')) dictionary1 = Dictionary('deadcode.DeadStores') dictionary2 = Dictionary( 'clang-diagnostic-tautological-constant-out-of-range-compare') dictionary3 = Dictionary('clang-diagnostic-unused-parameter') dictionary4 = Dictionary('clang-diagnostic-constant-conversion') coder1 = Coder(dictionary1) coder2 = Coder(dictionary2) coder3 = Coder(dictionary3) coder4 = Coder(dictionary4) totalDictionaryLength1 = dictionary1.length() totalDictionaryLength2 = dictionary2.length() totalDictionaryLength3 = dictionary3.length() totalDictionaryLength4 = dictionary4.length() def ProcessBugsInFile(fileName): # 5.1. # 5.2. for bug in bugsPerFile[fileName]: bugData = bugDataList[bug] cleanFn = fileName[:-4]
def __init__(self, N, mask_frac, conv=1., stabil=10): N = int(N / conv) self.stabil = stabil self.mask_frac = mask_frac self.size = N**2 self.mask_size = int(self.size / self.mask_frac) pad = 0.0001 # Build mem/ptr/ctx unit self.prefix = "test" layer_configs, connections = build_unit(self.prefix, N, "graph_net", conv, pad) # Assign gate indices self.gate_layer_name = "g" self.gates = {} for conn in connections: if any( conn.get(key, False) for key in ["gate", "decay", "learning"]): conn["from layer"] = self.gate_layer_name conn["subset config"]["from column start"] = len(self.gates) conn["subset config"]["from column end"] = len(self.gates) + 1 self.gates[conn["name"]] = len(self.gates) # Build gate layer layer_configs.append( build_layer(self.gate_layer_name, "nvm_heaviside", 1, len(self.gates), pad)) structure = { "name": "graph_net", "type": "parallel", "layers": layer_configs } self.net = Network({ "structures": [structure], "connections": connections }) ### Create activators and coders self.act = tanh_activator(pad, self.size) self.act_h = heaviside_activator(self.size) self.layer_names = [ self.prefix + "m", self.prefix + "p", self.prefix + "c", self.gate_layer_name ] self.acts = { self.prefix + "m": self.act, self.prefix + "p": self.act, self.prefix + "c": self.act_h, self.gate_layer_name: self.act_h, } self.coders = { self.prefix + "m": Coder(self.act), self.prefix + "p": Coder(self.act), self.prefix + "c": Coder(self.act_h), self.gate_layer_name: Coder(self.act_h), }
def runCoder(cfgFile): coder = Coder(coderNormal) coder.run(cfgFile)
if __name__ == '__main__': np.set_printoptions(linewidth=200, formatter={'float': lambda x: '% .2f' % x}) N = 8 PAD = 0.05 from activator import * # act_fun = tanh_activator act_fun = logistic_activator act = act_fun(PAD, N) coder = Coder(act) layer_names = ['mem', 'ip', 'opc', 'op1', 'op2', 'op3'] layers = [Layer(name, N, act, coder) for name in layer_names] NL = len(layers) + 2 # +2 for gate out/hidden NG = NL**2 + NL NH = 100 actg = heaviside_activator(NG) acth = act_fun(PAD, NH) gate_output = Layer('gates', NG, actg, Coder(actg)) gate_hidden = Layer('ghide', NH, acth, Coder(acth)) layers.extend([gate_hidden, gate_output]) gate_map = gm.make_nvm_gate_map([layer.name for layer in layers]) gs = GateSequencer(gate_map, gate_output, gate_hidden,
from coder import Coder, MorseCoder import string if __name__ == "__main__": # By default, the translator will encode files by switching them to uppercase translator = Coder(string.ascii_lowercase, string.ascii_uppercase) while (1): line = raw_input() coded = translator.encode(line) print coded print translator.decode(coded)
def runCoderHeuristic(cfgFile): heuristicCoder = Coder(coderHeuristic) heuristicCoder.run(cfgFile)
def test(N, pad, mask_frac, mappings, stabil=5): fsm_states = mappings.keys() + list( set(v for m in mappings.values() for k, v in m)) input_states = list(set(k for m in mappings.values() for k, v in m)) shape = (N, N) size = N**2 act = tanh_activator(pad, size) act_log = logistic_activator(pad, size) input_layer, fsm_layer = (Layer(k, shape, act, Coder(act)) for k in "ab") input_layer.encode_tokens(input_states, orthogonal=True) fsm_layer.encode_tokens(fsm_states, orthogonal=True) ########### OLD METHOD ################### # Learn recurrent weights w_r = np.zeros((size, size)) b = np.zeros((size, 1)) X = fsm_layer.encode_tokens(fsm_states) dw, db = rehebbian(w_r, b, X, X, act, act) w_r = w_r + dw # Learn inter-regional weights w = np.zeros((size, size * 2)) b = np.zeros((size, 1)) for s, m in mappings.items(): X = input_layer.encode_tokens([k for k, v in m]) s = np.repeat(fsm_layer.coder.encode(s), X.shape[1], axis=1) X = np.concatenate((X, s), axis=0) Y = fsm_layer.encode_tokens([v for k, v in m]) dw, db = rehebbian(w, b, X, Y, act, act) w = w + dw # Test correct = 0 weighted = 0. total = 0 for start, m in mappings.items(): start = fsm_layer.coder.encode(start) for inp, end in m: x = np.concatenate((input_layer.coder.encode(inp), start), axis=0) y = act.f(w.dot(x)) # Stabilize for _ in range(stabil): old_y = y y = act.f(w_r.dot(y)) if np.array_equal(y, old_y): break out = fsm_layer.coder.decode(y) if out == end: correct += 1 weighted += 1.0 else: weighted += float( len( np.where( np.sign(y) == np.sign(fsm_layer.coder.encode( end))))) / size total += 1 old_acc = float(correct) / total weighted_old_acc = weighted / total ########### NEW METHOD ################### input_layer, fsm_layer = (Layer(k, shape, act, Coder(act)) for k in "ab") input_layer.encode_tokens(input_states, orthogonal=False) fsm_layer.encode_tokens(fsm_states, orthogonal=False) # Create gating masks for each state w_masks = { s: (np.random.random((size, size)) < (1. / mask_frac)).astype(np.float) for s in fsm_states } # Ensure nonzero masks for mask in w_masks.values(): if np.sum(mask) == 0: mask[randint(0, mask.shape[0] - 1), randint(0, mask.shape[1] - 1)] = 1. # Test learning of masks w_m = np.zeros((size**2, size)) b = np.zeros((size**2, 1)) X = fsm_layer.encode_tokens(fsm_states) Y = np.concatenate(tuple(w_masks[s].reshape(-1, 1) for s in fsm_states), axis=1) dw, db = rehebbian(w_m, b, X, Y, act, act) w_m = w_m + dw ''' for s in fsm_states: x = fsm_layer.coder.encode(s) y = act_log.f(w_m.dot(x)) print(np.sum((y.reshape(size,size) > 0.5) != (w_masks[s] > 0.5))) ''' # Learn recurrent weights w_r = np.zeros((size, size)) b = np.zeros((size, 1)) X = fsm_layer.encode_tokens(fsm_states) dw, db = rehebbian(w_r, b, X, X, act, act) w_r = w_r + dw # Learn inter-regional weights w = np.zeros((size, size)) b = np.zeros((size, 1)) for start, m in mappings.items(): # Start state mask, input_layer input X = input_layer.encode_tokens([k for k, v in m]) Y = fsm_layer.encode_tokens([v for k, v in m]) w_mask = w_masks[start] dw, db = rehebbian(np.multiply(w, w_mask), b, X, Y, act, act) w = w + (np.multiply(dw, w_mask) * mask_frac) # Test total = 0 weighted = 0. masked_weighted = 0. correct = 0 masked_correct = 0 for start, m in mappings.items(): #w_masked = np.multiply(w_masks[start], w) x = fsm_layer.coder.encode(start) w_masked = np.multiply(w, act_log.f(w_m.dot(x)).reshape(size, size)) #w_masked = np.multiply(w, (w_m.dot(x) > 0).astype(np.int).reshape(size,size)) for inp, end in m: x = input_layer.coder.encode(inp) y = act.f(w_masked.dot(x)) # Stabilize for _ in range(stabil + 1): old_y = y y = act.f(w_r.dot(y)) if np.array_equal(y, old_y): break out = fsm_layer.coder.decode(y) # Check output if out == end: correct += 1 weighted += 1.0 else: weighted += float( len( np.where( np.sign(y) == np.sign(fsm_layer.coder.encode( end))))) / size total += 1 new_acc = float(correct) / total weighted_new_acc = weighted / total return { "old_acc": old_acc, "new_acc": new_acc, "weighted_old_acc": weighted_old_acc, "weighted_new_acc": weighted_new_acc }
""" Python program to realize the simple stenography which implements both coding and decoding part. :Author: Manthan C S :GitHub: mnthnx64 """ from coder import Coder from decoder import Decoder if __name__ == '__main__': cdr = Coder("In all the examples so far, the elements of a are provided by the iterator one at a time, because all the looping logic is internal to the iterator. While this is simple and convenient, it is not very efficient. A better approach is to move the one-dimensional innermost loop into your code, external to the iterator. This way, NumPy’s vectorized operations can be used on larger chunks of the elements being visited.") cdr.encode() dcdr = Decoder() text = dcdr.decode() print(text)
def generate_train_data(args): """ Generates train data for the region provided through arguments. Parameters ---------- reads_path : path to the aligned reads file truth_genome_path : path to the truth genome ref : reference sequence region : region for which data is required Returns ------- region_name : region name positions : positions corresponding provided region examples : examples corresponding provided region labels : labels corresponding provided region """ reads_path, truth_genome_path, ref, region = args aligns = get_aligns(truth_genome_path, region) filtered_aligns = filter_aligns(aligns) print(f'>> finished generating labels for {region.name}:{region.start}-{region.end}') if not filtered_aligns: print(f'>> no alignments') return None positions = [] examples = [] labels = [] for align in filtered_aligns: position_label_dict = dict() positions_with_unknown_base = set() pos, lbls = get_postions_and_labels(align, ref, region) for position, label in zip(pos, lbls): if label == Coder.encode(Coder.UNKNOWN): positions_with_unknown_base.add(position) else: position_label_dict[position] = label sorted_positions = sorted(list(position_label_dict.keys())) region_string = f'{region.name}:{sorted_positions[0][0] + 1}-{sorted_positions[-1][0]}' result = gen.generate_features(reads_path, str(ref), region_string) for P, X in zip(*result): Y = [] to_yield = True for p in P: assert is_in_region(p[0], filtered_aligns) if p in positions_with_unknown_base: to_yield = False break try: y_label = position_label_dict[p] except KeyError: if p[1] != 0: y_label = Coder.encode(Coder.GAP) else: raise KeyError(f'error: No label mapping for position {p}!') Y.append(y_label) if to_yield: positions.append(P) examples.append(X) labels.append(Y) print(f'>> finished generating examples for {region.name}:{region.start}-{region.end}') return region.name, positions, examples, labels
def main(): start = time.time() # a = [] # b = [] # for i in range(0, 30): generated_array = Generator().populate_array(2**18) # print(generated_array) coded_array = Coder().triple_code(generated_array) # print(coded_array) # a.append(i / 100) # distorted_array = Channel(i/100).distort(coded_array) distorted_array = Channel(0.15).distort(coded_array) # print(distorted_array) decoded_array = Decoder().decode(distorted_array) # print(decoded_array) print(error_factor(generated_array, decoded_array)) # b.append(error_factor(generated_array, decoded_array)) # print(a, b) # plt.plot(a, b) # plt.title("Error percentage depending on the probability p") # plt.xlabel("Probability of error p") # plt.ylabel("Error factor in %") # plt.show() end = time.time() print("Time:", end-start) """ Etap II """ print("Stage 2") # 2048 bch = BCH() a = [] b = [] error = 0 sent_msg = b_util.urandom(512) m = 8 t = 63 k = 9 # 7_21_29 # received_msg = BCH.code(sent_msg, m, t, k, 0.3) # for i in range(0, 20, 1): # for j in range(10): # received_msg = BCH.code(sent_msg, 3, 1, 4, i/100) # filled_array = fill_with_zeros(sent_msg, len(received_msg)) # # print(len(example_bit_array), len(received_msg)) # # # print(received_msg) # # print("Error [%] - decoded msg: ", error_factor(example_bit_array, received_msg)) # error += error_factor(filled_array, received_msg) # error /= 10 # a.append(i/100) # b.append(error) # print(a, b) # plt.plot(a, b) # plt.title( # f"Error percentage depending on the probability p \nfor m = {m} k = {k} t = {t}") # plt.xlabel("Probability of error p") # plt.ylabel("Error factor in %") # plt.show() for i in range(0,20,3): counter = 0 for m in bch_code_parameters: for t in bch_code_parameters[m]: counter +=1 received_msg = BCH.code(sent_msg, m, t, bch_code_parameters[m][t], i/100) filled_array = fill_with_zeros(sent_msg, len(received_msg)) # print(len(example_bit_array), len(received_msg)) # # print(received_msg) # print("Error [%] - decoded msg: ", error_factor(example_bit_array, received_msg)) error += error_factor(filled_array, received_msg) a.append(i/100) b.append(error/counter) error = 0 print(counter) print(a, b) plt.plot(a, b) plt.title("Error percentage depending on the probability p") plt.xlabel("Probability of error p") plt.ylabel("Error factor in %") plt.show()