def pipeline(self, img): # each detection is ordered in terms of priority. A detection higher on the list will override a similar # detection lower down. # initial detection preformed on entire image. transposed, padhw, shavedim, resized = preprocess(img, shave=False) yolo_output = model.predict(np.array([transposed]))[0] boxes = process_output(yolo_output, threshold=0.20, padhw=padhw, shaved=False, shavedim=shavedim) # preform detection on each box in the stored previous boxes for box in self.boxes: offset = 150 shavedim = [ box.y1 - offset, box.y2 + offset, box.x1 - offset, box.x2 + offset ] transposed, padhw, shavedim, resized = preprocess( img, shave=True, shavedim=shavedim) yolo_output = model.predict(np.array([transposed]))[0] boxes2 = process_output(yolo_output, threshold=0.35, padhw=padhw, shaved=True, shavedim=shavedim) boxes += boxes2 # last detection preformed picture shaved to just the highway ahead transposed, padhw, shavedim, resized = preprocess(img, shave=True, shavedim=(350, 500, 500, 1000)) yolo_output = model.predict(np.array([transposed]))[0] boxes2 = process_output(yolo_output, threshold=0.30, padhw=padhw, shaved=True, shavedim=shavedim) boxes += boxes2 # remove duplacate boxes boxes = remove_duplicates(boxes, img) boxes = self.apply_threshold(boxes) if (len(boxes) < len(self.boxes)) and self.dropped < 3: self.dropped += 1 boxes = self.boxes else: self.dropped = 0 drawn = draw_boxes(boxes, img) self.boxes = boxes return drawn
def fit(self, X, y, optimizer=DCDM, C=10): if type(optimizer) == str: eps = 1e-5 m, n = X.shape y = y.reshape(-1, 1) * 1. X_dash = y * X P = np.dot(X_dash, X_dash.T) * 1. P += np.eye(*P.shape) * eps q = -np.ones((m, 1)).reshape((m,)) G = np.vstack((-np.eye(m), np.eye(m))) G += np.eye(*G.shape) * eps h = np.hstack((np.zeros(m), np.ones(m) * C)).reshape((2*m,)) A = y.reshape(1, -1) b = np.zeros(1) gc.collect() start = time() with Capturing() as output: if optimizer.startswith('cvxpy_'): alphas = qpsolvers.cvxpy_solve_qp( P, q, G, h, A, b, solver=optimizer[6:], verbose=True) else: alphas = qpsolvers.solve_qp( P, q, G, h, A, b, solver=optimizer, verbose=True) runtime1 = time() - start result, result_df, runtime2 = process_output(output, optimizer) if result: w = np.matmul(alphas, X_dash) self.w = w else: print("Solve process failed.") return result, result_df, runtime2 or runtime1 elif issubclass(optimizer, Optimizer): eps = 1e-5 m, n = X.shape X_dash = y.reshape(-1, 1) * X P = np.dot(X_dash, X_dash.T) * 1. P += np.eye(*P.shape) * eps q = -np.ones((m, 1)).reshape((m,)) opt = optimizer() start = time() with Capturing() as output: opt.optimize(self, X, y, P, q) runtime1 = time() - start result, result_df, runtime2 = process_output(output, optimizer) return result, result_df, runtime2 or runtime1 else: raise ValueError(f'Optimizer {optimizer} is not recognized')
def decode(): with tf.Session() as sess: # Load vocabularies. vocab_file = FLAGS.data_dir + "/vocab.pkl" word2id = pkl.load(open(vocab_file, "rb")) id2word = {v: k for (k, v) in word2id.items()} embeddings = embedding.Embedding(None, word2id, id2word, word2id["UNK"], word2id["PAD"], word2id["</s>"], word2id["<s>"]) # Create model and load parameters. FLAGS.batch_size = 1 # We decode one sentence at a time. model = create_model(sess, True, len(word2id)) # Decode from standard input. sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline() while sentence: encoder_inputs, decoder_inputs, target_weights, bucket_id = utils.prepare_input_sent( sentence, embeddings, _buckets) # Get output logits for the sentence. _, _, output_logits = model.step( sess, np.array([encoder_inputs]).transpose(), np.array([decoder_inputs]).transpose(), np.array([target_weights]).transpose(), bucket_id, True) print(utils.process_output(output_logits, embeddings)) print("> ", end="") sys.stdout.flush() sentence = sys.stdin.readline()
def test_wav_file(self, file_name, save_path): """ Function to extract multi pitch from wav file. """ sess = tf.Session() self.load_model(sess, log_dir=config.log_dir) in_batches_hcqt, nchunks_in, max_len = self.read_input_wav_file( file_name) out_batches_atb = [] for in_batch_hcqt in in_batches_hcqt: feed_dict = { self.input_placeholder: in_batch_hcqt, self.is_train: False } out_atb = sess.run(self.outputs, feed_dict=feed_dict) out_batches_atb.append(out_atb) out_batches_atb = np.array(out_batches_atb) out_batches_atb = utils.overlapadd( out_batches_atb.reshape(out_batches_atb.shape[0], config.batch_size, config.max_phr_len, -1), nchunks_in) out_batches_atb = out_batches_atb[:max_len] time_1, ori_freq = utils.process_output(out_batches_atb) utils.save_multif0_output(time_1, ori_freq, save_path)
def query_model(model, dataset="dataset1"): x, _ = load_data(subset=dataset, should_resize=True) y_pred = model.predict(x, batch_size=1) y_pred = process_output(y_pred, width=y_pred.shape[2], height=y_pred.shape[1]) for i in range(len(x)): imsave(f"./data/outputs/{i+1}_rgb.png", np.uint8(np.round(x[i] * 255))) imsave(f"./data/outputs/{i+1}_mask.png", y_pred[i])
def extract_f0_file(self, file_name, sess): in_batches_hcqt, atb, nchunks_in = self.read_input_file(file_name) out_batches_atb = [] for in_batch_hcqt in in_batches_hcqt: feed_dict = { self.input_placeholder: in_batch_hcqt, self.is_train: False } out_atb = sess.run(self.outputs, feed_dict=feed_dict) out_batches_atb.append(out_atb) out_batches_atb = np.array(out_batches_atb) out_batches_atb = utils.overlapadd( out_batches_atb.reshape(out_batches_atb.shape[0], config.batch_size, config.max_phr_len, -1), nchunks_in) out_batches_atb = out_batches_atb[:atb.shape[0]] time_1, ori_freq = utils.process_output(atb) time_2, est_freq = utils.process_output(out_batches_atb) scores = mir_eval.multipitch.evaluate(time_1, ori_freq, time_2, est_freq) return scores
def test_process_output(self): # 3 1d_bins: (0, 85, 170, 255) n_1d_bins = 3 original_shape = (2, 2) # note the (width, height, 1) size instead of (width, height) luminance = np.array([[[-75], [-94]], [[-75], [26]]], dtype=int) binned_ab_channels = np.array([[4, 3], [4, 4]], dtype=int) expected_output = np.array([[[48, 50, 51], [0, 49, 163]], [[48, 50, 51], [143, 146, 147]]], dtype=np.uint8) output = process_output(luminance, binned_ab_channels, original_shape, n_1d_bins) np.testing.assert_equal(output, expected_output)
def test_wav_folder(self, folder_name, save_path): """ Function to extract multi pitch from wav files in a folder """ songs = next(os.walk(folder_name))[1] sess = tf.Session() self.load_model(sess, log_dir=config.log_dir) for song in songs: count = 0 print("Processing song %s" % song) file_list = [ x for x in os.listdir(os.path.join(folder_name, song)) if x.endswith('.wav') and not x.startswith('.') ] for file_name in file_list: in_batches_hcqt, nchunks_in, max_len = self.read_input_wav_file( os.path.join(folder_name, song, file_name)) out_batches_atb = [] for in_batch_hcqt in in_batches_hcqt: feed_dict = { self.input_placeholder: in_batch_hcqt, self.is_train: False } out_atb = sess.run(self.outputs, feed_dict=feed_dict) out_batches_atb.append(out_atb) out_batches_atb = np.array(out_batches_atb) out_batches_atb = utils.overlapadd( out_batches_atb.reshape(out_batches_atb.shape[0], config.batch_size, config.max_phr_len, -1), nchunks_in) out_batches_atb = out_batches_atb[:max_len] time_1, ori_freq = utils.process_output(out_batches_atb) utils.save_multif0_output( time_1, ori_freq, os.path.join(save_path, song, file_name[:-4] + '.csv')) count += 1 utils.progress(count, len(file_list), suffix='evaluation done')
def next_(): last = request.args.get('lastid') premise = request.args.get('premise') if last and premise: return {"error": "You shouldn't send both ``last`` and ``context``"} elif not any([last, premise]): return {"error": "You must send something"} elif last: last = np.array([[int(last)]]) elif premise: con.tensorset('context', context) last = init_conversation(premise) wordids = [] con.tensorset('last', last) con.modelrun('gptmodel', inputs=['last', 'context'], outputs=['out', 'context']) out = con.tensorget('out') last = process_output(out) wordids.append(last.item()) words = ids2text(wordids) return {"next": words, "nextid": wordids[-1]}
model.summary() load_weights(model, './yolo-tiny.weights') test_image = cv2.imread('test_images/test3.jpg') test_image = cv2.cvtColor(test_image, cv2.COLOR_BGR2RGB) plt.imshow(test_image[200:600, 400:1200]) test_image.shape processed, padhw, shavedim, resized = preprocess(test_image, shave=True) plt.imshow(resized) padhw prediction = model.predict(np.array([processed]))[0] boxes = process_output(prediction, padhw=padhw, shaved=True) len(boxes) boxes = remove_duplicates(boxes, test_image) img = draw_boxes(boxes, test_image) plt.figure(figsize=[10, 10]) plt.imshow(img) class Pipeline: def __init__(self): self.boxes = [] self.dropped = 0 self.history = deque(maxlen=8) self.first_frames = True
def infer_on_stream(args, client): """ Initialize the inference network, stream video to network, and output stats and video. :param args: Command line arguments parsed by `build_argparser()` :param client: MQTT client :return: None """ mqtt_client=connect_mqtt() infer_network = Network(args.model) infer_network.load_model(args.device,args.cpu_extension) n,c,h,w = infer_network.get_input_shape() input_validated, single_image_mode=utils.validate_input(args.input) cap=cv2.VideoCapture(input_validated) if not cap.isOpened(): exit("Error: couldn't open input file") video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) input_width = int(cap.get(3)) input_height = int(cap.get(4)) frame_rate=cap.get(cv2.CAP_PROP_FPS) frame_count=0 #stats vars current_people_before=0 current_people_now=0 current_people_buffer=0 total_people_count=0 time_in_frame=0.0 #time the current detected person has stayed so far [sec] total_times=[0.0] #list of total time in frame for all people detected so far average_time=0.0 #average time in frame for all people detected so far new_person_detected=False while True: ret, frame = cap.read() if not ret: break start_time=time.time() processed_frame=utils.process_input(frame, h, w) input_dict=infer_network.get_inputs(processed_frame,h,w,SCALE) request_handle=infer_network.exec_inference(input_dict,0) infer_network.wait(request_handle) output=infer_network.get_output(request_handle) boxes=utils.process_output(output,args.prob_threshold,input_width,input_height) inference_time=int((time.time()-start_time)*1000.0) frame_count=frame_count+1 current_people_now=len(boxes) if not single_image_mode: #working with video if (current_people_now != current_people_before): current_people_buffer=current_people_buffer+1 new_person_detected=False if current_people_buffer == FILTER_COUNT: current_people_before = current_people_now current_people_buffer = 0 if current_people_now != 0: #a new person was detected total_people_count=total_people_count+1 mqtt_client.publish("person",json.dumps({"count": current_people_before})) #mqtt_client.publish("person",json.dumps({"total": total_people_count})) removed because UI calculates it new_person_detected=True else: #no detections on frame anymore, store time person was in frame total_times.append(time_in_frame) mqtt_client.publish("person/duration",json.dumps({"duration": time_in_frame})) mqtt_client.publish("person",json.dumps({"count": 0})) average_time=sum(total_times)/total_people_count time_in_frame=0 if(new_person_detected): time_in_frame = time_in_frame + 1/frame_rate utils.draw_results(frame, boxes, current_people_before, total_people_count, time_in_frame, average_time,inference_time) sys.stdout.buffer.write(frame) sys.stdout.flush() else: #working with a single image utils.draw_results(frame, boxes, current_people_now, -1, -1, -1,inference_time) cv2.imwrite('output.jpg',frame) cap.release() client.loop_stop() client.disconnect()