def evaluate_partition(pred, num_samples_down, orig): # pred (num_instances, max_seq_len, 1) # Evaluate with original labels in two ways # num_samples_down: constant length after downsampling (given constant length of input) CCC_orig = np.zeros(pred.shape[2]) CCC_orig_seq = np.zeros(pred.shape[2]) pred_list = [] # required by label postprocessing script orig_list = [] # required by label postprocessing script - only one target CCC_seq = np.array([]) predAll = np.array([]) origAll = np.array([]) for m in range(0,pred.shape[0]): # resampling predDown = scipy.signal.resample(pred[m,:],num_samples_down) # cropping or padding lenOrig = len(orig[m]) if len(predDown)>lenOrig: predDown = predDown[:lenOrig] elif len(predDown)<lenOrig: predDown = np.concatenate((predDown,np.zeros(lenOrig-len(predDown)))) # segment avg eval CCC = compute_ccc(predDown.flatten(),orig[m].flatten()) CCC_seq = np.append(CCC_seq,CCC) # global eval predAll = np.append(predAll,predDown) origAll = np.append(origAll,orig[m]) # append to lists pred_list.append(predDown.flatten()) orig_list.append(orig[m].flatten()) CCC_orig = compute_ccc(predAll,origAll) # global CCC_orig_seq = np.mean(CCC_seq) # segment average return CCC_orig, CCC_orig_seq, pred_list, orig_list
def evaluate_all(model, trainX, develX, testX, trainY, develY, testY, origTrain, origDevel, origTest, shift, factor, num_targets): CCC_train = np.zeros(num_targets) CCC_devel = np.zeros(num_targets) CCC_test = np.zeros(num_targets) CCC_orig_train = np.zeros(num_targets) CCC_orig_devel = np.zeros(num_targets) CCC_orig_test = np.zeros(num_targets) CCC_orig_train_seq = np.zeros(num_targets) CCC_orig_devel_seq = np.zeros(num_targets) CCC_orig_test_seq = np.zeros(num_targets) CCC_orig_devel_pp = np.zeros(num_targets) CCC_orig_test_pp = np.zeros(num_targets) # Get predictions predYtrain = model.predict(trainX) predYdevel = model.predict(develX) predYtest = model.predict(testX) if num_targets==1: # In this case, model.predict() does not return a list, which would be required predYtrain = [predYtrain] predYdevel = [predYdevel] predYtest = [predYtest] # Eval with the upsampled sampling rate and padded sequences for k in range(0,num_targets): # loop over target dimensions (arousal, valence, liking) CCC_train[k] = compute_ccc(predYtrain[k].flatten(), trainY[k].flatten()) CCC_devel[k] = compute_ccc(predYdevel[k].flatten(), develY[k].flatten()) CCC_test[k] = compute_ccc(predYtest[k].flatten(), testY[k].flatten()) # Eval with downsampling - with the original labels # First shift predictions back (delay) for k in range(0,num_targets): predYtrain[k] = shift_annotations_to_back(predYtrain[k], shift) predYdevel[k] = shift_annotations_to_back(predYdevel[k], shift) predYtest[k] = shift_annotations_to_back(predYtest[k], shift) num_samples_down = int(np.round(float(trainY[0].shape[1])/factor)) for k in range(0,num_targets): CCC_orig_train[k], CCC_orig_train_seq[k], _ , _ = evaluate_partition(predYtrain[k], num_samples_down, origTrain[k]) CCC_orig_devel[k], CCC_orig_devel_seq[k], pred_list_devel, orig_list_devel = evaluate_partition(predYdevel[k], num_samples_down, origDevel[k]) CCC_orig_test[k], CCC_orig_test_seq[k], pred_list_test, orig_list_test = evaluate_partition(predYtest[k], num_samples_down, origTest[k]) # With postprocessed labels CCC_pp_devel, best_param = postprocess_labels.train(orig_list_devel, pred_list_devel) CCC_pp_test = postprocess_labels.predict(orig_list_test, pred_list_test, best_param) CCC_orig_devel_pp[k] = CCC_pp_devel[-1] CCC_orig_test_pp[k] = CCC_pp_test[-1] #print(CCC_pp_devel) #print(CCC_pp_test) return CCC_train, CCC_devel, CCC_test, CCC_orig_train, CCC_orig_devel, CCC_orig_test, CCC_orig_train_seq, CCC_orig_devel_seq, CCC_orig_test_seq, CCC_orig_devel_pp, CCC_orig_test_pp
def predict(gold, pred, best_param): # gold/pred: list of numpy arrays, each array has 1 dimension (sequence_length,) # e.g., gold = [np.array([.5,.3,.6,.4,.7,.3]), np.array([-.1,.2,-.2,.1])] for two sequences of length 6 and 4 # best_param: [wmedian, bias, scale, shift] - obtained by train # raw, filter, center, scale, shift CCC_save = np.zeros(5) # gold_flat = flatten(gold) # compute performance on raw CCC_save[0] = compute_ccc(gold_flat, flatten(pred)) # apply filtering if best_param[0] > 0: kernel = best_param[0] pred = median_filter(pred, kernel, copy=False) CCC_save[1] = compute_ccc(gold_flat, flatten(pred)) else: CCC_save[1] = CCC_save[0] # apply centering if best_param[1] != 0: for k in range(0, len(pred)): pred[k] = pred[k] + best_param[1] CCC_save[2] = compute_ccc(gold_flat, flatten(pred)) else: CCC_save[2] = CCC_save[1] # apply scaling if best_param[2] != 1: for k in range(0, len(pred)): pred[k] = pred[k] * best_param[2] CCC_save[3] = compute_ccc(gold_flat, flatten(pred)) else: CCC_save[3] = CCC_save[2] # apply shifting shift_optim = best_param[3] if shift_optim > 0: for k in range(0, len(pred)): pred[k] = np.concatenate( (np.repeat(pred[k][0], shift_optim), pred[k][:-shift_optim])) CCC_save[4] = compute_ccc(gold_flat, flatten(pred)) elif shift_optim < 0: shift_optim = -shift_optim for k in range(0, len(pred)): pred[k] = np.concatenate( (pred[k][shift_optim:], np.repeat(pred[k][-1], shift_optim))) CCC_save[4] = compute_ccc(gold_flat, flatten(pred)) else: CCC_save[4] = CCC_save[3] return CCC_save # pred is filtered implicitly
CCC_seq = [] predAll = np.array([]) origAll = np.array([]) for subjectID in range(X.shape[0]): #print(compute_ccc(Y[subjectID,:,:].flatten(), pY[subjectID,:,:].flatten())) predDown = pY[ subjectID, :] #scipy.signal.resample(pY[subjectID,:],num_samples_down) lenOrig = len(orig_Y[subjectID]) if len(predDown) > lenOrig: predDown = predDown[:lenOrig] elif len(predDown) < lenOrig: predDown = np.concatenate( (predDown, np.zeros(lenOrig - len(predDown)))) # segment avg eval CCC = compute_ccc(predDown.flatten(), orig_Y[subjectID].flatten()) CCC_seq.append(CCC) # global eval predAll = np.append(predAll, predDown) origAll = np.append(origAll, orig_Y[subjectID]) # append to lists pred_list.append(predDown.flatten()) orig_list.append(orig_Y[subjectID].flatten()) #print(CCC_seq) # plt.plot(pY[subjectID,:].flatten(),'r') # plt.plot(predDown.flatten(),'g') # plt.plot(orig_Y[subjectID].flatten(),'b') # plt.plot(Y[0][subjectID],'k') # plt.show() CCC_orig = compute_ccc(predAll, origAll) # global
def train(gold, pred, Nw=100, wstep=4, Nshift=200): # gold/pred: list of numpy arrays, each array has 1 dimension (sequence_length,) # e.g., gold = [np.array([.5,.3,.6,.4,.7,.3]), np.array([-.1,.2,-.2,.1])] for two sequences of length 6 and 4 # Nw=100: adapted for a hop size of 0.1s # wstep=4: adapted for a hop size of 0.1s # Nshift=200: adapted for a hop size of 0.1s # raw, filter, center, scale, shift CCC_save = np.zeros(5) # list: wmedian, bias, scale, shift best_param = [] # flatten gold standard and predictions gold_flat = flatten(gold) # compute performance on raw CCC_save[0] = compute_ccc(gold_flat, flatten(pred)) best_CCC = CCC_save[0] # filter by dichotomy (makes computation faster ...) perc_impr = 2. new_val = np.zeros(4) new_val[0] = CCC_save[0] for k in range(0, 3): kernel = (k + 1) * Nw / 3 pred_filt = median_filter(pred, kernel) new_val[k + 1] = compute_ccc(gold_flat, flatten(pred_filt)) val = np.flip(np.sort(new_val), axis=0) indw = np.flip(np.argsort(new_val), axis=0) if indw[0] != 0 and 100 * (val[0] - new_val[0]) / new_val[0] > perc_impr: # filtering useful - perform second round of dichotomy indw = indw[0:2] * Nw / 3 new_indw = [indw[0], int(np.round(np.mean(indw))), indw[1]] kernel = new_indw[1] pred_filt = median_filter(pred, kernel) new_val = [val[0], compute_ccc(gold_flat, flatten(pred_filt)), val[1]] # continue dichotomy if still improvement if 100 * (new_val[1] - max(new_val[0], new_val[2])) / max( new_val[0], new_val[2]) > perc_impr: eot = True while eot: val = np.flip(np.sort(new_val), axis=0) indw = np.flip(np.argsort(new_val), axis=0) new_indw = [ new_indw[indw[0]], int( np.round( np.mean([new_indw[indw[0]], new_indw[indw[1]]]))), new_indw[indw[1]] ] kernel = new_indw[1] pred_filt = median_filter(pred, kernel) new_val = [ val[0], compute_ccc(gold_flat, flatten(pred_filt)), val[1] ] # if there is yet improvement if 100 * (new_val[1] - max(new_val[0], new_val[2])) / max( new_val[0], new_val[2]) > 0.: if len(best_param) == 0: # TODO: not so nice best_param.append(new_indw[1]) else: # TODO best_param[0] = new_indw[1] best_CCC = new_val[1] else: if len(best_param) == 0: # TODO: not so nice best_param.append(new_indw[0]) else: # TODO best_param[0] = new_indw[0] best_CCC = val[0] eot = False else: if 100 * (new_val[1] - max(new_val[0], new_val[2])) / max( new_val[0], new_val[2]) > 0.: best_param.append(new_indw[1]) best_CCC = new_val[1] else: best_param.append(indw[0]) best_CCC = val[0] else: best_param.append(0) # apply median filtering if best_param[-1] > 1: pred = median_filter(pred, best_param[-1], copy=False) CCC_save[1] = best_CCC ## center prediction pred_flat = flatten(pred) # Flatten latest filtered predictions mean_gold = np.mean(gold_flat) mean_pred = np.mean(pred_flat) bias = mean_gold - mean_pred pred_center = pred_flat + bias CCC_tmp = compute_ccc(gold_flat, pred_center) # save configuration if improvement if CCC_tmp > best_CCC: best_param.append(bias) best_CCC = CCC_tmp # Apply bias to all sequences for ind in range(0, len(pred)): pred[ind] = pred[ind] + bias else: best_param.append(0) CCC_save[2] = best_CCC ## scale prediction pred_flat = flatten(pred) # Flatten latest filtered predictions std_gold = np.std(gold_flat) std_pred = np.std(pred_flat) scale = std_gold / std_pred pred_scale = pred_flat * scale # save configuration if improvement CCC_tmp = compute_ccc(gold_flat, pred_scale) if CCC_tmp > best_CCC: best_param.append(scale) best_CCC = CCC_tmp # Apply scaling to all sequences for ind in range(0, len(pred)): pred[ind] = pred[ind] * scale else: best_param.append(1) CCC_save[3] = best_CCC ## shift prediction backward / forward CCC_tmp = np.zeros(2 * Nshift / wstep + 1) CCC_tmp[Nshift / wstep] = best_CCC for shift in range(1, Nshift / wstep + 1): tmp_flat = np.empty(0) for seq in pred: tmp = np.concatenate( (seq[shift * wstep:], np.repeat(seq[-1], shift * wstep))) tmp_flat = np.concatenate((tmp_flat, tmp)) CCC_tmp[Nshift / wstep - shift] = compute_ccc(gold_flat, tmp_flat) for shift in range(1, Nshift / wstep + 1): tmp_flat = np.empty(0) for seq in pred: tmp = np.concatenate( (np.repeat(seq[0], shift * wstep), seq[:-shift * wstep])) tmp_flat = np.concatenate((tmp_flat, tmp)) CCC_tmp[Nshift / wstep + shift] = compute_ccc(gold_flat, tmp_flat) val = np.max(CCC_tmp) ind = np.argmax(CCC_tmp) # save configuration if improvement if val > best_CCC: shift_optim = (ind - Nshift / wstep) * wstep best_param.append(shift_optim) best_CCC = val if shift_optim > 0: for k in range(0, len(pred)): pred[k] = np.concatenate( (np.repeat(pred[k][0], shift_optim), pred[k][:-shift_optim])) elif shift_optim < 0: shift_optim = -shift_optim for k in range(0, len(pred)): pred[k] = np.concatenate( (pred[k][shift_optim:], np.repeat(pred[k][-1], shift_optim))) else: best_param.append(0) CCC_save[4] = best_CCC return CCC_save, best_param # pred is filtered implicitly
's42b128e8000d0.0r24.h5', custom_objects={'ccc_loss_2': ccc_loss_2}) predict = model.predict(testtX) test_ccc_list = [] concat_test_predict = np.array([]) concat_test_ground = np.array([]) for cur_test_id, orig_test_id in enumerate(testtsamples): if orig_test_id not in [ 0, 2, 3, 4, 6, 10, 12, 13, 15, 18, 19, 20, 21, 23, 25, 27, 29, 39, 40, 43, 45, 47, 50, 51, 52, 54, 55, 57, 59 ]: cur_predict = predict[ cur_test_id][:total_vector_count[orig_test_id]].flatten() cur_ground = testtY[ cur_test_id][:total_vector_count[orig_test_id]].flatten() concat_test_predict = np.append(concat_test_predict, cur_predict) concat_test_ground = np.append(concat_test_ground, cur_ground) cur_ccc = compute_ccc(cur_ground, cur_predict) test_ccc_list.append(cur_ccc) concat_test_ccc = compute_ccc(concat_test_ground, concat_test_predict) print(test_ccc_list, concat_test_ccc) ''' for test_id in what_to_test: plt.plot(predict[test_id]) plt.plot(testtY[test_id]) plt.title("{}: {}".format(test_id,facs_list[test_id])) plt.show() '''