def multicode(reader): #reader - [fname, contents] preproc_data = [] result = [] flag = 0 x = len(reader) for i in range(x): data = preproc.preprocess(reader[i]) preproc_data.append(data) combinations = list(itertools.combinations(preproc_data, 2)) for combo in combinations: result.append(preproc.check(combo)) sim_result = "" for i in range(len(result)): if result[i] == True: flag = 1 sim_result += "</br><b>Suspicious programs:</b> <i>%s %s</i>" % ( combinations[i][0][3], combinations[i][1][3]) tok1 = preproc.replace_tokens(combinations[i][0][0]) tok2 = preproc.replace_tokens(combinations[i][1][0]) p1, p2 = greedy_string_tiling(tok1, tok2) sim_result += "</br><b>%d%s</b> of %s found similar to %s" % ( p1, "%", combinations[i][0][3], combinations[i][1][3]) sim_result += "</br><b>%d%s</b> of %s found similar to %s</br>" % ( p2, "%", combinations[i][1][3], combinations[i][0][3]) if flag == 0: sim_result += "</br>Nothing suspicious found..." sim_result += "</p>" return sim_result
def preprocessLMDBDist(number): print('Starting with', number, 'th chunk') numTxt = ('0' if number < 10 else '') + str(number) envOri = lmdb.open(LMDB_ROOT_PATH + '\\dist' + numTxt, readonly=True) envPreproc = lmdb.open(LMDB_ROOT_PATH + '\\proc' + numTxt, map_size=map_size) datumOri = caffe.proto.caffe_pb2.Datum() datumPreproc = caffe.proto.caffe_pb2.Datum() datumPreproc.channels = 5 datumPreproc.height = RESOLUTION datumPreproc.width = RESOLUTION curKey = None count = 0 while True: with envOri.begin() as txnOri: cursor = txnOri.cursor() if curKey: cursor.set_key(curKey) # read Ori datum buffer = [] for i in range(BUF_SIZE): if not cursor.next(): break curKey = cursor.key() datumOri.ParseFromString(cursor.value()) flat_x = np.fromstring(datumOri.data, dtype=np.uint8) x = flat_x.reshape(datumOri.channels, datumOri.height, datumOri.width) datumPreproc.data = preprocess(x).tobytes() # or .tostring() if numpy < 1.9 datumPreproc.label = datumOri.label buffer.append((curKey, datumPreproc.SerializeToString())) count += 1 if count % 100 == 0: print(count, datumOri.label, 'done') if len(buffer) == 0: break with envPreproc.begin(write=True) as txnPreproc: txnPreproc.cursor().putmulti(buffer) print(number, 'th chunk done')
content = f.read() html = HTMLParser(content) if html.body is None: continue for tag in html.css('script'): tag.decompose() for tag in html.css('style'): tag.decompose() content = html.body.text(separator='\n') text = tokenize_text(content) content = preprocess(content) content, snippets = index_words(content, text) for word in content: num, i = content[word] cur.execute('INSERT OR IGNORE INTO IndexWord (word) VALUES (?);', (word, )) post = (word, file_name, num, i) cur.execute('INSERT INTO Posting VALUES (?,?,?,?);', post) conn.commit() for index in snippets: snippet = snippets[index]
import time import sqlite3 import re from selectolax.parser import HTMLParser from preproc import preprocess, tokenize_text # importing custom package if len(sys.argv) == 1: print("ERROR: no arguments.") exit(0) elif len(sys.argv) > 2: print("ERROR: too much arguments.") exit(0) q = sys.argv[1] query = preprocess(q) query = [word for i, word in query] print("Connecting to database...") conn = sqlite3.connect('inverted-index.db') cur = conn.cursor() s = ''' SELECT p.documentName AS docName, SUM(frequency) AS freq, GROUP_CONCAT(indexes) AS idxs FROM Posting p WHERE p.word IN ({l}) GROUP BY p.documentName ORDER BY freq DESC;
datumPreproc = caffe.proto.caffe_pb2.Datum() datumPreproc.channels = 5 datumPreproc.height = RESOLUTION datumPreproc.width = RESOLUTION curKey = None count = 0 while True: with envOri.begin() as txnOri: cursor = txnOri.cursor() if curKey: cursor.set_key(curKey) # read Ori datum buffer = [] for i in range(BUF_SIZE): if not cursor.next(): break curKey = cursor.key() datumOri.ParseFromString(cursor.value()) flat_x = np.fromstring(datumOri.data, dtype=np.uint8) x = flat_x.reshape(datumOri.channels, datumOri.height, datumOri.width) datumPreproc.data = preprocess( x).tobytes() # or .tostring() if numpy < 1.9 datumPreproc.label = datumOri.label buffer.append((curKey, datumPreproc.SerializeToString())) count += 1 if count % 100 == 0: print(count, datumOri.label, 'done') if len(buffer) == 0: break with envPreproc.begin(write=True) as txnPreproc: txnPreproc.cursor().putmulti(buffer)
cfg.n_ensemble = 5 vec_idx_healthy = [1, 150] vec_idx_dry_amd = [1, 150] vec_idx_cnv = [1, 150] vec_train_acc = [] vec_valid_acc = [] vec_test_acc = [] vec_y_true = [] vec_y_pred = [] vec_model = [] # Preprocessing Xs, ys = preprocess(vec_idx_healthy, vec_idx_dry_amd, vec_idx_cnv, cfg) for i in range(cfg.n_ensemble): print("\n\nIteration: {}".format(i + 1)) model = get_model('arch_011', cfg) callbacks = get_callbacks(cfg) h = model.fit(Xs[0], ys[0], batch_size=cfg.batch_size, epochs=cfg.n_epoch, verbose=2, callbacks=callbacks, validation_data=(Xs[1], ys[1]), shuffle=False,
fp.close() fooledCnt3ch = 0 fooledCnt5ch = 0 count = 0 bothlist = [] for imagePath, fname in allFiles(DATA_PATH): label = int(fname[9:].split('_')[0]) if label > 200: continue # load image img = cv2.imread(imagePath) img = cv2.resize(img, (224, 224)) img3ch = img.transpose((2, 0, 1)) img5ch = preprocess(img3ch) # make batch batch3ch = np.zeros((NUM_IMAGE, *img3ch.shape)).astype(np.float32) batch5ch = np.zeros((NUM_IMAGE, *img5ch.shape)).astype(np.float32) batch3ch[0] = img3ch batch5ch[0] = img5ch # input, forward and get output net3ch.blobs['data'].data[...] = batch3ch net5ch.blobs['data'].data[...] = batch5ch out3ch = net3ch.forward() out5ch = net5ch.forward() probs3ch = out3ch['pool4'].reshape(1000) probs5ch = out5ch['pool4'].reshape(1000) answer3ch = probs3ch.argmax()
def predict(img): model_xml = "C:\modeloptimizing\HTRModel.xml" model_bin = "C:\modeloptimizing\HTRModel.bin" ie = IECore() net = IENetwork(model=model_xml, weights=model_bin) input_blob = next(iter(net.inputs)) n, c, h, w = net.inputs[input_blob].shape exec_net = ie.load_network(network=net, device_name="CPU") input_size = (1024, 128, 1) img = preprocess(img, input_size=input_size) img = normalization([img]) img = np.squeeze(img, axis=3) img = np.expand_dims(img, axis=0) start = timer() print("Starting inference...") res = exec_net.infer(inputs={input_blob: img}) end = timer() print("End inference time: ", 1000 * (end - start)) output_data = res['dense/BiasAdd/Softmax'] print(output_data) steps_done = 0 steps = 1 batch_size = int(np.ceil(len(output_data) / steps)) input_length = len(max(output_data, key=len)) predicts, probabilities = [], [] while steps_done < steps: index = steps_done * batch_size until = index + batch_size x_test = np.asarray(output_data[index:until]) x_test_len = np.asarray([input_length for _ in range(len(x_test))]) decode, log = K.ctc_decode(x_test, x_test_len, greedy=False, beam_width=10, top_paths=3) probabilities.extend([np.exp(x) for x in log]) decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode] predicts.extend(np.swapaxes(decode, 0, 1)) steps_done += 1 for p in predicts: print(str(p)) for pb in probabilities: print(str(pb)) #interpretation of the data max_text_length = 128 charset_base = string.printable[:95] tokenizer = Tokenizer(chars=charset_base, max_text_length=max_text_length) predicts = [[tokenizer.decode(x) for x in y] for y in predicts] print("\n####################################") for i, (pred, prob) in enumerate(zip(predicts, probabilities)): print("\nProb. - Predict") for (pd, pb) in zip(pred, prob): print(f"{pb:.4f} - {pd}") if i == 0: pbperc = pb * 100 pdfinal = pd i = 1 + i print("\n####################################") return pdfinal, pbperc
print 'Usage <file> <max-literals> [<max-number-of-properties>]' sys.exit(1) fn = args[0] num = int(args[1]) num_generated = int(args[2]) if len(args) >= 3 else None # Parse model inf = open(fn, 'r') model = nusmv_yacc.parser.parse(inf.read()) inf.close() (variable_dict, unprimed_vars, definition_vars), \ (definitions, transition_definitions), \ (initials, xinitials, transitions, invariants,\ xinvariants, urgencies, resets, prop, xprop), \ orgspecs, clockmax = preproc.preprocess(model, True, False, 0, True, False, get_original_def_info=True) variables = [] clocks = [] for v, t in variable_dict.iteritems(): if not v.endswith(preproc.PRIME_SUFFIX): if t == expressions.REAL: clocks.append(v) variables.append((v, t)) for n, t in preproc.g_list_of_original_definitions.iteritems(): if not v.endswith(preproc.PRIME_SUFFIX): if t == expressions.REAL: clocks.append(n) variables.append((n, t))
#!/usr/bin/env python3 from vid_crop import crop from frame_by_frame import write_frames from preproc import preprocess import sys #write_frames(sys.argv[1]) preprocess()
def sort_by_freq(e): doc, freq, snippet = e return freq if len(sys.argv) == 1: print("ERROR: no arguments.") exit(0) elif len(sys.argv) > 2: print("ERROR: too much arguments.") exit(0) q = sys.argv[1] query = preprocess(q) query = [word for i, word in query] dir_names = [ "e-prostor.gov.si", "e-uprava.gov.si", "evem.gov.si", "podatki.gov.si" ] dir_nums = [218, 60, 662, 564] num_files = sum(dir_nums) files_checked = 0 print("Executing query...") start = time.time() results = dict()