def validation(model, criterion, valid_loader): model.eval() losses = [] f2_scores = [] for inputs, targets in valid_loader: inputs = utils.variable(inputs, volatile=True) targets = utils.variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) losses.append(loss.data[0]) f2_scores.append(f2_score(y_true=targets.data.cpu().numpy(), y_pred=F.sigmoid(outputs).data.cpu().numpy() > 0.2)) valid_loss = np.mean(losses) # type: float valid_f2 = np.mean(f2_scores) # type: float print('Valid loss: {:.4f}, F2: {:.4f}'.format(valid_loss, valid_f2)) return {'valid_loss': valid_loss, 'valid_f2': valid_f2}
def validation(model, criterion, valid_loader): model.eval() losses = [] f2_scores = [] for inputs, targets in valid_loader: inputs = utils.variable(inputs, volatile=True) targets = utils.variable(targets) outputs = model(inputs) loss = criterion(outputs, targets) losses.append(loss.data[0]) f2_scores.append( f2_score(y_true=targets.data.cpu().numpy(), y_pred=F.sigmoid(outputs).data.cpu().numpy() > 0.2)) valid_loss = np.mean(losses) # type: float valid_f2 = np.mean(f2_scores) # type: float print('Valid loss: {:.4f}, F2: {:.4f}'.format(valid_loss, valid_f2)) return {'valid_loss': valid_loss, 'valid_f2': valid_f2}
def link_key_point(img_canvas, candidate, subset, stickwidth=4): for i in range(17): for n in range(len(subset)): index = subset[n][np.array(limb_seq[i]) - 1] if -1 in index: continue cur_canvas = img_canvas.copy() Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mX = np.mean(X) mY = np.mean(Y) length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) img_canvas = cv2.addWeighted(img_canvas, 0.4, cur_canvas, 0.6, 0) return img_canvas
def link_key_point(img_canvas, candidate, subset, stickwidth=4): for i in range(17): for n in range(len(subset)): index = subset[n][np.array(limb_seq[i]) - 1] if -1 in index: continue cur_canvas = img_canvas.copy() Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mX = np.mean(X) mY = np.mean(Y) length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) polygon = cv2.ellipse2Poly( (int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) img_canvas = cv2.addWeighted(img_canvas, 0.4, cur_canvas, 0.6, 0) return img_canvas
cooldown=2, verbose=1, min_lr=1e-5 * lr, factor=adaptive_lr_factor) for epoch in range(1, num_epochs + 1): print("Begin epoch {}/{}".format(epoch, num_epochs)) epoch_losses, epoch_f2 = train_epoch( train_loader, model, loss_fn, optimizer, dtype, sigmoid_threshold=sigmoid_threshold, print_every=20) scheduler.step(np.mean(epoch_losses), epoch) ## f2 score for validation dataset f2_acc = validate_epoch(model, val_loader, dtype, sigmoid_threshold=sigmoid_threshold) ## store results train_acc_history += epoch_f2 val_acc_history.append(f2_acc) loss_history += epoch_losses ## overwrite the model .pkl file every epoch torch.save(model.state_dict(), save_model_path) save_accuracy_and_loss_mat(save_mat_path, train_acc_history, val_acc_history, loss_history,
canvas = cv2.imread(test_image) # B,G,R order for i in range(18): for j in range(len(all_peaks[i])): cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) stickwidth = 4 for i in range(17): for n in range(len(subset)): index = subset[n][np.array(limbSeq[i]) - 1] if -1 in index: continue cur_canvas = canvas.copy() Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mX = np.mean(X) mY = np.mean(Y) length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) polygon = cv2.ellipse2Poly( (int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) #Parallel(n_jobs=1)(delayed(handle_one)(i) for i in range(18)) toc = time.time() print 'time is %.5f' % (toc - tic) cv2.imwrite('result.png', canvas)
def post_processing(self, humans): for h in humans: ''' 0 1 nose 2 3 neck 4 5 r_shoulder 6 7 r_elbow 8 9 r_wrist 10 11 l_shoulder 12 13 l_elbow 14 15 l_wrist 16 17 r_pelvis 18 19 r_knee 20 21 r_anckle 22 23 l_pervis 24 25 l_knee 26 27 l_ankle 28 29 r_eye 30 31 l_eye 32 33 r_ear 34 35 l_ear ''' #rasing hand if h.joints[10] >= 0 and h.joints[12] >= 0 and h.joints[ 10] > h.joints[12]: h.isLWaving = 1 h.tags.append('lwaving') if h.joints[4] >= 0 and h.joints[6] >= 0 and h.joints[ 4] > h.joints[6]: h.isRWaving = 1 h.tags.append('rwaving') if h.isLWaving == 1 or h.isRWaving == 1: h.isWaving = 1 h.tags.append('waving') ''' #rasing hand if h.joints[10] >= 0 and h.joints[14] >= 0 and h.joints[10] > h.joints[14]+self.params['waving_thr']: h.isLWaving = 1 h.tags.append('lwaving') if h.joints[4] >= 0 and h.joints[8] >= 0 and h.joints[4] > h.joints[8]+self.params['waving_thr']: h.isRWaving = 1 h.tags.append('rwaving') if h.isLWaving == 1 or h.isRWaving == 1: h.isWaving = 1 h.tags.append('waving') ''' #sitting shoulder_h = -9999 cropped_cloud = self.cvbridge.imgmsg_to_cv2( h.cropped_cloud, desired_encoding="passthrough") ''' #adjust positions valid_joints = [] for i in range(0,18): if h.joints[2*i] != -1 and h.joints[2*i+1] != -1 : temp = self.get_pos_wrt_robot(cropped_cloud , h.joints[2*i] , h.joints[2*i+1]) if (temp != 0).all(): valid_joints.append( temp ) if len(valid_joints) > 0 and h.valid_pose == 1 : pos_wrt_robot = np.array(valid_joints) pos_wrt_robot = np.median(pos_wrt_robot,axis=0) print 'pose_wrt_robot : ' , pos_wrt_robot h.pose_wrt_robot.position.x = pos_wrt_robot[0] h.pose_wrt_robot.position.y = pos_wrt_robot[1] h.pose_wrt_robot.position.z = pos_wrt_robot[2] pose_wrt_map = self.get_loc(pos_wrt_robot) print pose_wrt_map h.pose_wrt_map.position.x = pose_wrt_map[0] h.pose_wrt_map.position.y = pose_wrt_map[1] h.pose_wrt_map.position.z = pose_wrt_map[2] pose_wrt_odom = self.get_loc(pos_wrt_robot,target='odom') h.pose_wrt_odom.position.x = pose_wrt_odom[0] h.pose_wrt_odom.position.y = pose_wrt_odom[1] h.pose_wrt_odom.position.z = pose_wrt_odom[2] ''' if h.joints[4] > 0 and h.joints[5] > 0: shoulder_h = max( shoulder_h, self.get_pos_wrt_robot(cropped_cloud, h.joints[4], h.joints[5])[2]) if h.joints[10] > 0 and h.joints[11] > 0: shoulder_h = max( shoulder_h, self.get_pos_wrt_robot(cropped_cloud, h.joints[10], h.joints[11])[2]) if shoulder_h < self.params['sitting_thr'] and shoulder_h > -9999: h.isSitting = 1 h.tags.append('sitting') # lying and standing knee = np.mean([h.joints[18], h.joints[24]]) if h.joints[2] >= 0 and knee >= 0 and abs(h.joints[2] - knee) <= 20: h.isLying = 1 h.tags.append('lying') # pointing point_length = 45 if h.joints[9] >= 0 and h.joints[5] >= 0 and abs( h.joints[5] - h.joints[9]) > point_length: if h.joints[5] - h.joints[9] > 0: h.isRPointing = 1 h.tags.append('rpointing') else: h.isLPointing = 1 h.tags.append('lpointing') if h.joints[11] >= 0 and h.joints[15] >= 0 and abs( h.joints[15] - h.joints[11]) > point_length: if h.joints[15] - h.joints[11] > 0: h.isLPointing = 1 h.tags.append('lpointing') else: h.isRPointing = 1 h.tags.append('rpointing') print h.object_id, h.person_name, h.isWaving, h.isSitting, shoulder_h return humans
#### for batter first videos = [] # Not found until there is a frame with a person detected while not found and p < 30: #len(df[player][i])==0: ret, frame = video_capture.read() out = handle_one( frame[top_b:bottom_b, left_b:right_b]) # changed batter first move for person in range(len(out)): hips = np.asarray(out[person])[indices] hips = hips[np.sum(hips, axis=1) != 0] if len(hips) == 0: continue mean_hips = np.mean(hips, axis=0) norm = abs(mean_hips[0] - center[0]) + abs( mean_hips[1] - center[1]) #6 hip if norm < old_norm: found = True loc = person old_norm = norm p += 1 if not found: pitcher_array.append([[0, 0] for j in range(18)]) print("no person detected in frame", p) if not found: print("no person detected in first 30 frames") continue #left_b = 0 # change for batters first move
def handle_one(oriImg): # for visualize canvas = np.copy(oriImg) imageToTest = Variable(T.transpose( T.transpose(T.unsqueeze(torch.from_numpy(oriImg).float(), 0), 2, 3), 1, 2), volatile=True).cuda() print oriImg.shape scale = model_['boxsize'] / float(oriImg.shape[0]) print scale h = int(oriImg.shape[0] * scale) w = int(oriImg.shape[1] * scale) pad_h = 0 if (h % model_['stride'] == 0) else model_['stride'] - (h % model_['stride']) pad_w = 0 if (w % model_['stride'] == 0) else model_['stride'] - (w % model_['stride']) new_h = h + pad_h new_w = w + pad_w imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_['stride'], model_['padValue']) imageToTest_padded = np.transpose( np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5 feed = Variable(T.from_numpy(imageToTest_padded)).cuda() output1, output2 = model(feed) heatmap = nn.UpsamplingBilinear2d( (oriImg.shape[0], oriImg.shape[1])).cuda()(output2) paf = nn.UpsamplingBilinear2d( (oriImg.shape[0], oriImg.shape[1])).cuda()(output1) print heatmap.size() print paf.size() print type(heatmap) heatmap_avg = T.transpose(T.transpose(heatmap[0], 0, 1), 1, 2).data.cpu().numpy() paf_avg = T.transpose(T.transpose(paf[0], 0, 1), 1, 2).data.cpu().numpy() all_peaks = [] peak_counter = 0 #maps = for part in range(18): map_ori = heatmap_avg[:, :, part] map = gaussian_filter(map_ori, sigma=3) map_left = np.zeros(map.shape) map_left[1:, :] = map[:-1, :] map_right = np.zeros(map.shape) map_right[:-1, :] = map[1:, :] map_up = np.zeros(map.shape) map_up[:, 1:] = map[:, :-1] map_down = np.zeros(map.shape) map_down[:, :-1] = map[:, 1:] peaks_binary = np.logical_and.reduce( (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > param_['thre1'])) # peaks_binary = T.eq( # peaks = zip(T.nonzero(peaks_binary)[0],T.nonzero(peaks_binary)[0]) peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse peaks_with_score = [x + (map_ori[x[1], x[0]], ) for x in peaks] id = range(peak_counter, peak_counter + len(peaks)) peaks_with_score_and_id = [ peaks_with_score[i] + (id[i], ) for i in range(len(id)) ] all_peaks.append(peaks_with_score_and_id) peak_counter += len(peaks) connection_all = [] special_k = [] mid_num = 10 for k in range(len(mapIdx)): score_mid = paf_avg[:, :, [x - 19 for x in mapIdx[k]]] candA = all_peaks[limbSeq[k][0] - 1] candB = all_peaks[limbSeq[k][1] - 1] nA = len(candA) nB = len(candB) indexA, indexB = limbSeq[k] if (nA != 0 and nB != 0): connection_candidate = [] for i in range(nA): for j in range(nB): vec = np.subtract(candB[j][:2], candA[i][:2]) norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) vec = np.divide(vec, norm) startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ np.linspace(candA[i][1], candB[j][1], num=mid_num)) vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ for I in range(len(startend))]) vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ for I in range(len(startend))]) score_midpts = np.multiply(vec_x, vec[0]) + np.multiply( vec_y, vec[1]) score_with_dist_prior = sum( score_midpts) / len(score_midpts) + min( 0.5 * oriImg.shape[0] / norm - 1, 0) criterion1 = len( np.nonzero(score_midpts > param_['thre2']) [0]) > 0.8 * len(score_midpts) criterion2 = score_with_dist_prior > 0 if criterion1 and criterion2: connection_candidate.append([ i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2] ]) connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) connection = np.zeros((0, 5)) for c in range(len(connection_candidate)): i, j, s = connection_candidate[c][0:3] if (i not in connection[:, 3] and j not in connection[:, 4]): connection = np.vstack( [connection, [candA[i][3], candB[j][3], s, i, j]]) if (len(connection) >= min(nA, nB)): break connection_all.append(connection) else: special_k.append(k) connection_all.append([]) # last number in each row is the total parts number of that person # the second last number in each row is the score of the overall configuration subset = -1 * np.ones((0, 20)) candidate = np.array([item for sublist in all_peaks for item in sublist]) for k in range(len(mapIdx)): if k not in special_k: partAs = connection_all[k][:, 0] partBs = connection_all[k][:, 1] indexA, indexB = np.array(limbSeq[k]) - 1 for i in range(len(connection_all[k])): #= 1:size(temp,1) found = 0 subset_idx = [-1, -1] for j in range(len(subset)): #1:size(subset,1): if subset[j][indexA] == partAs[i] or subset[j][ indexB] == partBs[i]: subset_idx[found] = j found += 1 if found == 1: j = subset_idx[0] if (subset[j][indexB] != partBs[i]): subset[j][indexB] = partBs[i] subset[j][-1] += 1 subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] elif found == 2: # if found 2 and disjoint, merge them j1, j2 = subset_idx print "found = 2" membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] if len(np.nonzero(membership == 2)[0]) == 0: #merge subset[j1][:-2] += (subset[j2][:-2] + 1) subset[j1][-2:] += subset[j2][-2:] subset[j1][-2] += connection_all[k][i][2] subset = np.delete(subset, j2, 0) else: # as like found == 1 subset[j1][indexB] = partBs[i] subset[j1][-1] += 1 subset[j1][-2] += candidate[ partBs[i].astype(int), 2] + connection_all[k][i][2] # if find no partA in the subset, create a new subset elif not found and k < 17: row = -1 * np.ones(20) row[indexA] = partAs[i] row[indexB] = partBs[i] row[-1] = 2 row[-2] = sum( candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] subset = np.vstack([subset, row]) # delete some rows of subset which has few parts occur deleteIdx = [] for i in range(len(subset)): if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: deleteIdx.append(i) subset = np.delete(subset, deleteIdx, axis=0) # canvas = cv2.imread(test_image) # B,G,R order for i in range(18): for j in range(len(all_peaks[i])): cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) stickwidth = 4 for i in range(17): for n in range(len(subset)): index = subset[n][np.array(limbSeq[i]) - 1] if -1 in index: continue cur_canvas = canvas.copy() Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mX = np.mean(X) mY = np.mean(Y) length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) polygon = cv2.ellipse2Poly( (int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) return canvas
def main(args): # hyperparameters batch_size = args.batch_size num_workers = 1 # Image Preprocessing transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) # load COCOs dataset IMAGES_PATH = 'data/train2014' CAPTION_FILE_PATH = 'data/annotations/captions_train2014.json' vocab = load_vocab() train_loader = get_coco_data_loader(path=IMAGES_PATH, json=CAPTION_FILE_PATH, vocab=vocab, transform=transform, batch_size=batch_size, shuffle=True, num_workers=num_workers) IMAGES_PATH = 'data/val2014' CAPTION_FILE_PATH = 'data/annotations/captions_val2014.json' val_loader = get_coco_data_loader(path=IMAGES_PATH, json=CAPTION_FILE_PATH, vocab=vocab, transform=transform, batch_size=batch_size, shuffle=True, num_workers=num_workers) losses_val = [] losses_train = [] # Build the models ngpu = 1 initial_step = initial_epoch = 0 embed_size = args.embed_size num_hiddens = args.num_hidden learning_rate = 1e-3 num_epochs = 3 log_step = args.log_step save_step = 500 checkpoint_dir = args.checkpoint_dir encoder = CNN(embed_size) decoder = RNN(embed_size, num_hiddens, len(vocab), 1, rec_unit=args.rec_unit) # Loss criterion = nn.CrossEntropyLoss() if args.checkpoint_file: encoder_state_dict, decoder_state_dict, optimizer, *meta = utils.load_models( args.checkpoint_file, args.sample) initial_step, initial_epoch, losses_train, losses_val = meta encoder.load_state_dict(encoder_state_dict) decoder.load_state_dict(decoder_state_dict) else: params = list(decoder.parameters()) + list( encoder.linear.parameters()) + list(encoder.batchnorm.parameters()) optimizer = torch.optim.Adam(params, lr=learning_rate) if torch.cuda.is_available(): encoder.cuda() decoder.cuda() if args.sample: return utils.sample(encoder, decoder, vocab, val_loader) # Train the Models total_step = len(train_loader) try: for epoch in range(initial_epoch, num_epochs): for step, (images, captions, lengths) in enumerate(train_loader, start=initial_step): # Set mini-batch dataset images = utils.to_var(images, volatile=True) captions = utils.to_var(captions) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # Forward, Backward and Optimize decoder.zero_grad() encoder.zero_grad() if ngpu > 1: # run on multiple GPU features = nn.parallel.data_parallel( encoder, images, range(ngpu)) outputs = nn.parallel.data_parallel( decoder, features, range(ngpu)) else: # run on single GPU features = encoder(images) outputs = decoder(features, captions, lengths) train_loss = criterion(outputs, targets) losses_train.append(train_loss.data[0]) train_loss.backward() optimizer.step() # Run validation set and predict if step % log_step == 0: encoder.batchnorm.eval() # run validation set batch_loss_val = [] for val_step, (images, captions, lengths) in enumerate(val_loader): images = utils.to_var(images, volatile=True) captions = utils.to_var(captions, volatile=True) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] features = encoder(images) outputs = decoder(features, captions, lengths) val_loss = criterion(outputs, targets) batch_loss_val.append(val_loss.data[0]) losses_val.append(np.mean(batch_loss_val)) # predict sampled_ids = decoder.sample(features) sampled_ids = sampled_ids.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(sampled_ids, vocab) print('Sample:', sentence) true_ids = captions.cpu().data.numpy()[0] sentence = utils.convert_back_to_text(true_ids, vocab) print('Target:', sentence) print( 'Epoch: {} - Step: {} - Train Loss: {} - Eval Loss: {}' .format(epoch, step, losses_train[-1], losses_val[-1])) encoder.batchnorm.train() # Save the models if (step + 1) % save_step == 0: utils.save_models(encoder, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses( losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl')) except KeyboardInterrupt: pass finally: # Do final save utils.save_models(encoder, decoder, optimizer, step, epoch, losses_train, losses_val, checkpoint_dir) utils.dump_losses(losses_train, losses_val, os.path.join(checkpoint_dir, 'losses.pkl'))
def post_process(oriImg, canvas, paf_avg, all_peak_idxs, nonzero_vals, dont_draw): all_peaks = [[] for x in range(N_JOINTS)] for i in xrange(all_peak_idxs.shape[0]): all_peaks[all_peak_idxs[i, 0]].append( (all_peak_idxs[i, 2], all_peak_idxs[i, 1], nonzero_vals[i], i)) PEAKT = time.time() connection_all = [] special_k = [] mid_num = 10 KTTT = time.time() # don't really know how this works for k in xrange(len(mapIdx)): idxs = [] for x in mapIdx[k]: idxs.append(x - 19) score_mid = paf_avg[idxs, :, :] candA = all_peaks[limbSeq[k][0] - 1] candB = all_peaks[limbSeq[k][1] - 1] nA = len(candA) nB = len(candB) indexA, indexB = limbSeq[k] if (nA != 0 and nB != 0): connection_candidate = [] for i in range(nA): for j in range(nB): vec = np.subtract(candB[j][:2], candA[i][:2]) norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1]) if norm == 0: continue vec = np.divide(vec, norm) startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ np.linspace(candA[i][1], candB[j][1], num=mid_num)) vec_x = np.zeros(len(startend)) vec_y = np.zeros(len(startend)) for I in xrange(len(startend)): vec_x[I] = score_mid[0, int(round(startend[I][1])), int(round(startend[I][0]))] vec_y[I] = score_mid[1, int(round(startend[I][1])), int(round(startend[I][0]))] score_midpts = np.multiply(vec_x, vec[0]) + np.multiply( vec_y, vec[1]) score_with_dist_prior = np.sum( score_midpts) / len(score_midpts) + min( 0.5 * oriImg.shape[0] / norm - 1, 0) criterion1 = len( np.nonzero(score_midpts > param_['thre2']) [0]) > 0.8 * len(score_midpts) criterion2 = score_with_dist_prior > 0 if criterion1 and criterion2: connection_candidate.append([ i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2] ]) connection_candidate = sorted(connection_candidate, key=get_third_item, reverse=True) connection = np.zeros((0, 5)) for c in range(len(connection_candidate)): i, j, s = connection_candidate[c][0:3] if (i not in connection[:, 3] and j not in connection[:, 4]): connection = np.vstack( [connection, [candA[i][3], candB[j][3], s, i, j]]) if (len(connection) >= min(nA, nB)): break connection_all.append(connection) else: special_k.append(k) connection_all.append([]) # last number in each row is the total parts number of that person # the second last number in each row is the score of the overall configuration subset = -1 * np.ones((0, 20)) cand_tmp = [] for sublist in all_peaks: for item in sublist: cand_tmp.append(item) candidate = np.asarray(cand_tmp) # don't really know how this works either for k in xrange(len(mapIdx)): if k not in special_k: partAs = connection_all[k][:, 0] partBs = connection_all[k][:, 1] indexA, indexB = np.array(limbSeq[k]) - 1 for i in range(len(connection_all[k])): #= 1:size(temp,1) found = 0 subset_idx = [-1, -1] for j in range(len(subset)): #1:size(subset,1): if subset[j][indexA] == partAs[i] or subset[j][ indexB] == partBs[i]: subset_idx[found] = j found += 1 if found == 1: j = subset_idx[0] if (subset[j][indexB] != partBs[i]): subset[j][indexB] = partBs[i] subset[j][-1] += 1 subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] elif found == 2: # if found 2 and disjoint, merge them j1, j2 = subset_idx #print "found = 2" membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2] if len(np.nonzero(membership == 2)[0]) == 0: #merge subset[j1][:-2] += (subset[j2][:-2] + 1) subset[j1][-2:] += subset[j2][-2:] subset[j1][-2] += connection_all[k][i][2] subset = np.delete(subset, j2, 0) else: # as like found == 1 subset[j1][indexB] = partBs[i] subset[j1][-1] += 1 subset[j1][-2] += candidate[ partBs[i].astype(int), 2] + connection_all[k][i][2] # if find no partA in the subset, create a new subset elif not found and k < 17: row = -1 * np.ones(20) row[indexA] = partAs[i] row[indexB] = partBs[i] row[-1] = 2 row[-2] = np.sum( candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2] subset = np.vstack([subset, row]) #print subset.shape # delete some rows of subset which has few parts occur deleteIdx = [] for i in range(len(subset)): if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4: deleteIdx.append(i) subset = np.delete(subset, deleteIdx, axis=0) N_BONES = N_JOINTS - 1 if dont_draw: res = {'found_ppl': []} for n in range(len(subset)): skel = {} for i in range(N_BONES): index = subset[n][np.array(limbSeq[i]) - 1] if -1 in index: continue Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] bone_name = bone_map[i] skel[bone_name] = ((X[0], Y[0]), (X[1], Y[1])) res['found_ppl'].append(skel) return res p = time.time() canvas = canvas.copy() for i in range(N_JOINTS): for j in range(len(all_peaks[i])): cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) stickwidth = 4 N_BONES = N_JOINTS - 1 for i in range(N_BONES): for n in range(len(subset)): index = subset[n][np.array(limbSeq[i]) - 1] if -1 in index: continue cur_canvas = canvas.copy() Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mX = np.mean(X) mY = np.mean(Y) length = ((X[0] - X[1])**2 + (Y[0] - Y[1])**2)**0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) polygon = cv2.ellipse2Poly( (int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1) cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) cv2.putText(canvas, str(i), (int(mY), int(mX)), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 3) cv2.putText(canvas, str(i), (int(mY), int(mX)), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) print('drawing took: ', time.time() - p) return canvas
def handle_one(oriImg): # for visualize canvas = np.copy(oriImg) imageToTest = Variable(T.transpose(T.transpose(T.unsqueeze(torch.from_numpy(oriImg).float(),0),2,3),1,2),volatile=True).cuda() print oriImg.shape scale = model_['boxsize'] / float(oriImg.shape[0]) print scale h = int(oriImg.shape[0]*scale) w = int(oriImg.shape[1]*scale) pad_h = 0 if (h%model_['stride']==0) else model_['stride'] - (h % model_['stride']) pad_w = 0 if (w%model_['stride']==0) else model_['stride'] - (w % model_['stride']) new_h = h+pad_h new_w = w+pad_w imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_['stride'], model_['padValue']) imageToTest_padded = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,2,0,1))/256 - 0.5 feed = Variable(T.from_numpy(imageToTest_padded)).cuda() output1,output2 = model(feed) heatmap = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output2) paf = nn.UpsamplingBilinear2d((oriImg.shape[0], oriImg.shape[1])).cuda()(output1) print heatmap.size() print paf.size() print type(heatmap) heatmap_avg = T.transpose(T.transpose(heatmap[0],0,1),1,2).data.cpu().numpy() paf_avg = T.transpose(T.transpose(paf[0],0,1),1,2).data.cpu().numpy() all_peaks = [] peak_counter = 0 #maps = for part in range(18): map_ori = heatmap_avg[:,:,part] map = gaussian_filter(map_ori, sigma=3) map_left = np.zeros(map.shape) map_left[1:,:] = map[:-1,:] map_right = np.zeros(map.shape) map_right[:-1,:] = map[1:,:] map_up = np.zeros(map.shape) map_up[:,1:] = map[:,:-1] map_down = np.zeros(map.shape) map_down[:,:-1] = map[:,1:] peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > param_['thre1'])) # peaks_binary = T.eq( # peaks = zip(T.nonzero(peaks_binary)[0],T.nonzero(peaks_binary)[0]) peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks] id = range(peak_counter, peak_counter + len(peaks)) peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] all_peaks.append(peaks_with_score_and_id) peak_counter += len(peaks) connection_all = [] special_k = [] mid_num = 10 for k in range(len(mapIdx)): score_mid = paf_avg[:,:,[x-19 for x in mapIdx[k]]] candA = all_peaks[limbSeq[k][0]-1] candB = all_peaks[limbSeq[k][1]-1] nA = len(candA) nB = len(candB) indexA, indexB = limbSeq[k] if(nA != 0 and nB != 0): connection_candidate = [] for i in range(nA): for j in range(nB): vec = np.subtract(candB[j][:2], candA[i][:2]) norm = math.sqrt(vec[0]*vec[0] + vec[1]*vec[1]) vec = np.divide(vec, norm) startend = zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \ np.linspace(candA[i][1], candB[j][1], num=mid_num)) vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \ for I in range(len(startend))]) vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \ for I in range(len(startend))]) score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1]) score_with_dist_prior = sum(score_midpts)/len(score_midpts) + min(0.5*oriImg.shape[0]/norm-1, 0) criterion1 = len(np.nonzero(score_midpts > param_['thre2'])[0]) > 0.8 * len(score_midpts) criterion2 = score_with_dist_prior > 0 if criterion1 and criterion2: connection_candidate.append([i, j, score_with_dist_prior, score_with_dist_prior+candA[i][2]+candB[j][2]]) connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True) connection = np.zeros((0,5)) for c in range(len(connection_candidate)): i,j,s = connection_candidate[c][0:3] if(i not in connection[:,3] and j not in connection[:,4]): connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]]) if(len(connection) >= min(nA, nB)): break connection_all.append(connection) else: special_k.append(k) connection_all.append([]) # last number in each row is the total parts number of that person # the second last number in each row is the score of the overall configuration subset = -1 * np.ones((0, 20)) candidate = np.array([item for sublist in all_peaks for item in sublist]) for k in range(len(mapIdx)): if k not in special_k: partAs = connection_all[k][:,0] partBs = connection_all[k][:,1] indexA, indexB = np.array(limbSeq[k]) - 1 for i in range(len(connection_all[k])): #= 1:size(temp,1) found = 0 subset_idx = [-1, -1] for j in range(len(subset)): #1:size(subset,1): if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]: subset_idx[found] = j found += 1 if found == 1: j = subset_idx[0] if(subset[j][indexB] != partBs[i]): subset[j][indexB] = partBs[i] subset[j][-1] += 1 subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] elif found == 2: # if found 2 and disjoint, merge them j1, j2 = subset_idx print "found = 2" membership = ((subset[j1]>=0).astype(int) + (subset[j2]>=0).astype(int))[:-2] if len(np.nonzero(membership == 2)[0]) == 0: #merge subset[j1][:-2] += (subset[j2][:-2] + 1) subset[j1][-2:] += subset[j2][-2:] subset[j1][-2] += connection_all[k][i][2] subset = np.delete(subset, j2, 0) else: # as like found == 1 subset[j1][indexB] = partBs[i] subset[j1][-1] += 1 subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2] # if find no partA in the subset, create a new subset elif not found and k < 17: row = -1 * np.ones(20) row[indexA] = partAs[i] row[indexB] = partBs[i] row[-1] = 2 row[-2] = sum(candidate[connection_all[k][i,:2].astype(int), 2]) + connection_all[k][i][2] subset = np.vstack([subset, row]) # delete some rows of subset which has few parts occur deleteIdx = []; for i in range(len(subset)): if subset[i][-1] < 4 or subset[i][-2]/subset[i][-1] < 0.4: deleteIdx.append(i) subset = np.delete(subset, deleteIdx, axis=0) # canvas = cv2.imread(test_image) # B,G,R order for i in range(18): for j in range(len(all_peaks[i])): cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) stickwidth = 4 for i in range(17): for n in range(len(subset)): index = subset[n][np.array(limbSeq[i])-1] if -1 in index: continue cur_canvas = canvas.copy() Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mX = np.mean(X) mY = np.mean(Y) length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) polygon = cv2.ellipse2Poly((int(mY),int(mX)), (int(length/2), stickwidth), int(angle), 0, 360, 1) cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) return canvas
canvas = cv2.imread(test_image) # B,G,R order for i in range(18): for j in range(len(all_peaks[i])): cv2.circle(canvas, all_peaks[i][j][0:2], 4, colors[i], thickness=-1) stickwidth = 4 for i in range(17): for n in range(len(subset)): index = subset[n][np.array(limbSeq[i])-1] if -1 in index: continue cur_canvas = canvas.copy() Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mX = np.mean(X) mY = np.mean(Y) length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) polygon = cv2.ellipse2Poly((int(mY),int(mX)), (int(length/2), stickwidth), int(angle), 0, 360, 1) cv2.fillConvexPoly(cur_canvas, polygon, colors[i]) canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0) #Parallel(n_jobs=1)(delayed(handle_one)(i) for i in range(18)) toc =time.time() print 'time is %.5f'%(toc-tic) cv2.imwrite('result.png',canvas)