def KL_divergence(resultrootdir, normal, mutant, layerlist, num_node=const.num_node): """ compute KL divergence Parameters ======================================= resultrootdir : str root directory for result normal : str ID of normal worm mutant : str ID of mutant worm layerlist : list of str list of layer num_node : int, optional the number of node in a hidden layer """ tag = normal + '_vs_' + mutant for layer in layerlist: print(layer) resultlist = [] savemodeldir_normal = os.path.join(resultrootdir, tag, 'gmm_model', normal, layer) savemodeldir_mutant = os.path.join(resultrootdir, tag, 'gmm_model', mutant, layer) for n in range(num_node): print('node-' + str(n)) if os.path.exists(os.path.join(savemodeldir_normal, 'node-' + str(n) + '.pkl')) \ and os.path.exists(os.path.join(savemodeldir_mutant, 'node-' + str(n) + '.pkl')): gmm1 = joblib.load( os.path.join(savemodeldir_normal, 'node-' + str(n) + '.pkl')) gmm2 = joblib.load( os.path.join(savemodeldir_mutant, 'node-' + str(n) + '.pkl')) kl = gmm_kl_bysampling(gmm1, gmm2) resultlist.append([n, kl]) np.savetxt(os.path.join(resultrootdir, tag, layer + '_kldiv.csv'), np.array(resultlist), header=io_utils.delimited_list(['node', 'kldiv'], ' '))
def mean(resultrootdir, normal, mutant, layerlist, savebinary, normal_timesteps, mutant_timesteps): """ diff of mean E_mutant - E_normal and E_normal - E_mutant Parameters ======================================= resultrootdir : str root directory for result normal : str ID of normal worm mutant : str ID of mutant worm layerlist : list of str list of layer savebinary : bool whether scores are saved by binary """ tag = normal + '_vs_' + mutant for layer in layerlist: print(layer) scorelist = io_utils.get_nodescores( os.path.join(resultrootdir, tag, normal), layer, savebinary, normal_timesteps) normal_varlist = np.array( [np.mean(score, axis=1) for score in scorelist]) #time axis scorelist = io_utils.get_nodescores( os.path.join(resultrootdir, tag, mutant), layer, savebinary, mutant_timesteps) mutant_varlist = np.array( [np.mean(score, axis=1) for score in scorelist]) normal_var = np.mean(normal_varlist, axis=0) #all trajectories mutant_var = np.mean(mutant_varlist, axis=0) varmn = mutant_var - normal_var #mutant_var / normal_var varnm = normal_var - mutant_var #normal_var / mutant_var resultlist = np.array( np.stack((np.arange(len(normal_var)), normal_var, mutant_var, varmn, varnm))).T np.savetxt(os.path.join(resultrootdir, tag, layer + '_mean.csv'), resultlist, header=io_utils.delimited_list([ 'node', 'mean_' + normal, 'mean_' + mutant, 'mean_' + mutant + '/' + normal, 'mean_' + normal + '/' + mutant ], ' '))
def allfeature(excelfile, output_dir, start_index=0, prefix='', minrow=3, maxrow=np.inf, labelcol=None, skip=0.): featuredir = os.path.join(output_dir, const.allfeature) if not os.path.exists(featuredir): os.makedirs(featuredir) count = 0 for sheet in excelfile: global current_sheet_for_error global current_line_for_error current_sheet_for_error = sheet.basename + ' ' + sheet.name if sheet.nrows < minrow: continue timelist = np.array([]) labellist = np.array([]) speedlist = np.array([]) accelerationlist = np.array([]) accelerationlist_angle = np.array([]) relative_anglelist = np.array([]) anglelist = np.array([]) other_featurelist = {} d_other_featurelist = {} for h in range(len(sheet.header)): if sheet.header[h] == 'time': continue if h == labelcol: continue other_featurelist[sheet.header[h]] = np.array([]) d_other_featurelist[sheet.header[h]] = np.array([]) additional_methodslist = [np.array([]), np.array([]), np.array([])] i0 = -1 for i in range(sheet.nrows): current_line_for_error = i if i < 2: continue if i > maxrow: break if sheet.colist[i].time < skip: continue if sheet.colist[i].x is None or sheet.colist[i - 1].x is None or sheet.colist[i - 2].x is None or \ sheet.colist[i].y is None or sheet.colist[i - 1].y is None or sheet.colist[ i - 2].y is None or \ sheet.colist[i].time is None or sheet.colist[i - 1].time is None or sheet.colist[ i - 2].time is None: # pass if empty continue else: if i0 == -1: i0 = i # print +i # print sheet.colist[i].x,sheet.colist[i-1].x,sheet.colist[i-2].x timelist = np.append(timelist, sheet.colist[i].time) timediff = float(sheet.colist[i].time - sheet.colist[i - 1].time) v_t = [] # angle relative to previous v_ref = [ sheet.colist[i - 1].x - sheet.colist[i - 2].x, sheet.colist[i - 1].y - sheet.colist[i - 2].y ] v_t.append(sheet.colist[i].x - sheet.colist[i - 1].x) v_t.append(sheet.colist[i].y - sheet.colist[i - 1].y) angle_t = angle(v_t, v_ref) relative_anglelist = np.append(relative_anglelist, angle_t / timediff) speedlist = np.append(speedlist, length(v_t) / timediff) acc, accangle = acceleration( sheet.colist[i - 2].p, sheet.colist[i - 1].p, sheet.colist[i].p, sheet.colist[i - 1].time - sheet.colist[i - 2].time, timediff) accelerationlist = np.append(accelerationlist, acc) accelerationlist_angle = np.append(accelerationlist_angle, accangle) anglelist = np.append(anglelist, np.arctan2(v_t[1], v_t[0]) / timediff) # distance from initial point additional_methodslist[0] = np.append( additional_methodslist[0], length([ sheet.colist[i].x - sheet.colist[i0].x, sheet.colist[i].y - sheet.colist[i0].y ])) # angle from initial point additional_methodslist[1] = np.append( additional_methodslist[1], np.arctan2(sheet.colist[i].y - sheet.colist[i0].y, sheet.colist[i].x - sheet.colist[i0].x)) # travel distance if len(additional_methodslist[2]) == 0: additional_methodslist[2] = np.append( additional_methodslist[2], 0) else: additional_methodslist[2] = np.append( additional_methodslist[2], length(v_t) + additional_methodslist[2][-1]) for h in range(len(sheet.header)): if sheet.header[h] == 'time': continue if h == labelcol: labellist = np.append( labellist, sheet.colist[i].dic[sheet.header[labelcol]]) continue other_featurelist[sheet.header[h]] = np.append( other_featurelist[sheet.header[h]], sheet.colist[i].dic[sheet.header[h]]) d_other_featurelist[sheet.header[h]] = np.append( d_other_featurelist[sheet.header[h]], (sheet.colist[i].dic[sheet.header[h]] - sheet.colist[i - 1].dic[sheet.header[h]]) / timediff) angle_normalization(relative_anglelist) savefilename = prefix + str(count + start_index) + '.csv' featurelist = [ speedlist, accelerationlist, accelerationlist_angle, relative_anglelist, anglelist ] headers = ['speed', 'acceleration', 'acc_angle', 'rel_angle', 'angle'] for h in range(len(sheet.header)): if sheet.header[h] == 'time': continue if h == labelcol: continue featurelist += [ other_featurelist[sheet.header[h]], d_other_featurelist[sheet.header[h]] ] headers += [sheet.header[h], 'd_' + sheet.header[h]] avg_featurelist = [ moving_average(feat, num_points=min(10, len(feat))) for feat in featurelist ] var_featurelist = [ moving_variance(feat, num_points=min(10, len(feat))) for feat in featurelist ] avg_headers = ['moving_avg_' + header for header in headers] var_headers = ['moving_var_' + header for header in headers] if labelcol is not None: headers = ['label'] + headers featurelist = [labellist] + featurelist headers = ['time'] + headers + list( additional_methods_name[2:]) + avg_headers + var_headers featurelist = [timelist] + featurelist + list( np.array( additional_methodslist)) + avg_featurelist + var_featurelist np.savetxt(os.path.join(featuredir, savefilename), np.array(featurelist).transpose(), delimiter=',', header=io_utils.delimited_list(headers)) count += 1 return count
def train(modelrootdir, timesteps, input_dim, nclass, X_normal, X_mutant, max_epoch, batch_size, normal, mutant, start_epoch=0, tag='', save_interval=50, alltime=False, X_normal_test=None, X_mutant_test=None, test_interval=1, num_node=const.num_node, num_lstmlayer=const.num_layer, use_dropout=True, drop_prob=0.5): try: except_flg = False # <editor-fold desc="building or loading model"> # load model if model file exists if os.path.exists(lstm.model_path(modelrootdir, tag=tag)): shutil.copy2(lstm.model_path(modelrootdir, tag=tag), lstm.model_path(modelrootdir, tag=tag, sufix='_old')) # save last model as *_old # load model if start_epoch > 0 and os.path.exists(lstm.model_path(modelrootdir, tag=tag)): print('loading model...') model = load_model(lstm.model_path(modelrootdir, tag=tag)) model.summary() # defining model else: if alltime: pass #model = lstm.buildAttentionModelDeepCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob, # hidden_unit=num_node, num_lstmlayer=num_lstmlayer) #model = lstm.buildAttentionModelDeepMultiCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob, # hidden_unit=num_node, num_lstmlayer=num_lstmlayer) else: #model = lstm.buildAttentionModelDeepMultiCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob, # hidden_unit=num_node, num_lstmlayer=num_lstmlayer) #model = lstm.buildAttentionModelDeepCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob, # hidden_unit=num_node, num_lstmlayer=num_lstmlayer) #model = lstm.buildAttentionModelDeepMultiLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob, # hidden_unit=num_node, num_lstmlayer=num_lstmlayer) #model = lstm.buildAttentionModelDeepMultiCNN(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob, # hidden_unit=num_node, num_lstmlayer=num_lstmlayer) model = lstm.buildAttentionModelMultiViewCNNLSTM(timesteps, input_dim, use_dropout=use_dropout, drop_prob=drop_prob, hidden_unit=num_node, num_lstmlayer=num_lstmlayer) if not os.path.exists(os.path.join(modelrootdir, tag)): os.makedirs(os.path.join(modelrootdir, tag)) # </editor-fold> # loss history of training lossfile_time = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') lossfile = os.path.join(modelrootdir, tag, 'loss_history_' + lossfile_time + '.csv') lf = open(lossfile, 'w') lf.write(io_utils.delimited_list(['time', 'epoch', 'batch', 'loss', 'accuracy'])) lf.write('\n') lf.close() # accuracy history of test tlf = None if not (X_normal_test is None or X_mutant_test is None): test_lossfile = os.path.join(modelrootdir, tag, 'test_history_' + lossfile_time + '.csv') tlf = open(test_lossfile, 'w') header = ['time', 'epoch', 'accuracy', normal + '_precision', normal + '_recall', normal + '_f-measure', mutant + '_precision', mutant + '_recall', mutant + '_f-measure', 'avg' + '_precision', 'avg' + '_recall', 'avg' + '_f-measure'] + [normal + '_' + str(x) for x in range(len(X_normal_test))] + [mutant + '_' + str(x) for x in range(len(X_mutant_test))] tlf.write(io_utils.delimited_list(header)) tlf.write('\n') if model.output.get_shape().ndims == 3: Y_normal = io_utils.hotvec_time(nclass, 0, len(X_normal), timesteps) Y_mutant = io_utils.hotvec_time(nclass, 1, len(X_mutant), timesteps) else: Y_normal = io_utils.hotvec(nclass, 0, len(X_normal)).reshape([-1, nclass]) Y_mutant = io_utils.hotvec(nclass, 1, len(X_mutant)).reshape([-1, nclass]) print('normal: ' + str(len(X_normal)) + 'samples') print('mutant: ' + str(len(X_mutant)) + 'samples') loop_per_epoch = (len(X_normal) + len(X_mutant)) / batch_size starttime = time.time() for epoch in range(start_epoch, max_epoch): losslist = np.empty([0]) acclist = np.empty([0]) # <editor-fold desc="train on one batch"> for batch in range(int(loop_per_epoch)): index_normal = rnd.randint(0, len(X_normal), int(batch_size / 2)) index_mutant = rnd.randint(0, len(X_mutant), int(batch_size / 2)) batch_X_normal = X_normal[index_normal] batch_X_mutant = X_mutant[index_mutant] batch_Y_normal = Y_normal[index_normal] batch_Y_mutant = Y_mutant[index_mutant] # minimal_len = min(np.min(map(len, batch_X_normal)), np.min(map(len, batch_X_mutant))) # batch_X_normal = np.array([b[:minimal_len] for b in batch_X_normal]) # batch_X_mutant = np.array([b[:minimal_len] for b in batch_X_mutant]) # batch_Y_normal = np.array([b[:minimal_len] for b in batch_Y_normal]) # batch_Y_mutant = np.array([b[:minimal_len] for b in batch_Y_mutant]) #stindex = rnd.randint(0, len(X_normal[0]), batch_size / 2) #edindex = rnd.randint(0, len(X_normal[0]) / 10, batch_size / 2) # in order to add noise, replace segment of normal and mtant #for i in range(batch_size / 2): # batch_X_normal[i][stindex[i]:stindex[i] + edindex[i]] = X_mutant[index_mutant][i][ # stindex[i]:stindex[i] + edindex[i]] # batch_X_mutant[i][stindex[i]:stindex[i] + edindex[i]] = X_normal[index_normal][i][ # stindex[i]:stindex[i] + edindex[i]] # batch_Y_normal[i][stindex[i]:stindex[i] + edindex[i]] = Y_mutant[index_mutant][i][ # stindex[i]:stindex[i] + edindex[i]] # batch_Y_mutant[i][stindex[i]:stindex[i] + edindex[i]] = Y_normal[index_normal][i][ # stindex[i]:stindex[i] + edindex[i]] #batch_X_normal = np.array([b for b in batch_X_normal]) #batch_X_mutant = np.array([b for b in batch_X_mutant]) #batch_Y_normal = np.array([b for b in batch_Y_normal]) #batch_Y_mutant = np.array([b for b in batch_Y_mutant]) X_train = np.concatenate((batch_X_normal, batch_X_mutant)) Y_train = np.concatenate((batch_Y_normal, batch_Y_mutant)) #X_all = np.concatenate((X_normal, X_mutant)) #Y_all = np.concatenate((Y_normal, Y_mutant)) #model.fit(X_all, Y_all, nb_epoch=100, validation_split=0.1, verbose=1) #print 'X_train.shape' + str(X_train.shape) #print 'Y_train.shape' + str(Y_train.shape) ret = model.train_on_batch(X_train, Y_train) losslist = np.append(losslist, ret[0]) acclist = np.append(acclist, ret[1]) lf = open(lossfile, 'a') lf.write(io_utils.delimited_list( [str(datetime.datetime.now()), str(epoch), str(batch), str(ret[0]), str(ret[1])])) lf.write('\n') lf.close() sys.stdout.write('\repoch' + str(epoch) + ' ' + io_utils.progressbar(loop_per_epoch, batch)) sys.stdout.write(' loss:' + ('%0.5f' % ret[0]) + ' acc:' + ('%0.5f' % ret[1])) sys.stdout.write(' ' + str(int(time.time() - starttime)) + '[s]') sys.stdout.flush() # </editor-fold> sys.stdout.write('\repoch' + str(epoch) + ' ' + io_utils.progressbar(loop_per_epoch, loop_per_epoch)) sys.stdout.write(' loss:' + ('%0.5f' % np.mean(losslist)) + ' acc:' + ('%0.5f' % np.mean(acclist))) sys.stdout.write(' ' + str(int(time.time() - starttime)) + '[s]') sys.stdout.write('\n') sys.stdout.flush() if epoch % save_interval == save_interval - 1: model.save(lstm.model_path(modelrootdir, tag=tag, prefix=str(epoch))) if epoch % test_interval == test_interval - 1: if not (X_normal_test is None or X_mutant_test is None): accuracy, normal_eval, mutant_eval, avg_eval, est_normal, est_mutant = test.return_alleval( model, X_normal_test, X_mutant_test, batch_size=batch_size, normal=normal, mutant=mutant) normal_resultlist = list(map(str, est_normal == 0)) mutant_resultlist = list(map(str, est_mutant == 1)) line = [str(datetime.datetime.now()), str(epoch)] + list(map( str, [accuracy] + normal_eval + mutant_eval + avg_eval )) + normal_resultlist + mutant_resultlist tlf.write(io_utils.delimited_list(line)) tlf.write('\n') print('test accuracy: ' + str(accuracy)) except: traceback.print_exc() except_flg = True finally: model.save(lstm.model_path(modelrootdir, tag=tag)) #lf.close() if tlf is not None: tlf.close() if except_flg: print('[fail]') sys.exit(1) return model
def aggregate_criteria( resultrootdir, normal, mutant, layerlist, num_node=const.num_node, criteria_list=['var', 'mean', 'histdist']): #'kldiv', 'entropy', maekawa 20181106 # criteria_list=['var', 'histdist']): """ aggregate criteria to one file Parameters ======================================= resultrootdir : str root directory for result normal : str ID of normal worm mutant : str ID of mutant worm layerlist : list of str list of layer num_node : int, optional the number of node in a hidden layer criteria_list : list of str, optional aggregated criteria """ tag = normal + '_vs_' + mutant layers = [] ranking = [] for layer in layerlist: print(layer) onelayer = np.arange(num_node).reshape(-1, 1) rankonelayer = np.arange(num_node).reshape(-1, 1) header = ['layer', 'node'] rankheader = ['layer', 'node'] for criterion in criteria_list: # print criterion filename = os.path.join(resultrootdir, tag, layer + '_' + criterion + '.csv') if not os.path.exists(filename): print('cannot find ' + filename) continue array = np.loadtxt(filename, delimiter=' ') onelayer = np.concatenate((onelayer, array[:, 1:]), axis=1) f = open(filename, 'r') firstline = f.readline() f.close() firstline = firstline.replace('# ', '').replace('\n', '').replace('\r', '').split() header += firstline[1:] if criterion == 'var': varmnlist = array[:, 3] varnmlist = array[:, 4] if criterion == 'mean': meanmnlist = array[:, 3] meannmlist = array[:, 4] if criterion == 'histdist': histdistlist = array[:, 1:] # for agreement in np.arange(0.5, 1., 0.1): # # 0: Correlation -[-1, 1]+ # Correlation = varmnlist * (1. - np.abs((histdistlist[:, 0] + 1.) / 2. - agreement)) #Maekawa 20181106 # #Correlation = meanmnlist * (1. - np.abs((histdistlist[:, 0] + 1.) / 2. - agreement)) # # 2: Intersect -[0, 1]+ # Intersect = varmnlist * (1. - np.abs(histdistlist[:, 2] - agreement)) #Maekawa 20181106 # #Intersect = meanmnlist * (1. - np.abs(histdistlist[:, 2] - agreement)) # # 3: Bhattacharyya -[1, 0]+ # Bhattacharyya = varmnlist * (1. - np.abs(-histdistlist[:, 3] + 1. - agreement)) #Maekawa 20181106 # #Bhattacharyya = meanmnlist * (1. - np.abs(-histdistlist[:, 3] + 1. - agreement)) # onelayer = np.concatenate((onelayer, np.array([Correlation, Intersect, Bhattacharyya]).T), axis=1) # header += [mutant + '/' + normal + '_Correlation_' + f"{agreement:.1f}", # mutant + '/' + normal + '_Intersect_' + f"{agreement:.1f}", # mutant + '/' + normal + '_Bhattacharyya_' + f"{agreement:.1f}"] # # rankonelayer = np.concatenate((rankonelayer, np.array([Intersect]).T), axis=1) # rankheader += [mutant + '/' + normal + '_' + f"{agreement:.1f}"] for agreement in np.arange(0.5, 1., 0.1): # 0: Correlation -[-1, 1]+ Correlation = varnmlist * (1. - np.abs( (histdistlist[:, 0] + 1.) / 2. - agreement)) #Maekawa 20181106 #Correlation = meannmlist * (1. - np.abs((histdistlist[:, 0] + 1.) / 2. - agreement)) # 2: Intersect -[0, 1]+ Intersect = varnmlist * ( 1. - np.abs(histdistlist[:, 2] - agreement)) #Maekawa 20181106 #Intersect = meannmlist * (1. - np.abs(histdistlist[:, 2] - agreement)) # 3: Bhattacharyya -[1, 0]+ Bhattacharyya = varnmlist * ( 1. - np.abs(-histdistlist[:, 3] + 1. - agreement) ) #Maekawa 20181106 #Bhattacharyya = meannmlist * (1. - np.abs(-histdistlist[:, 3] + 1. - agreement)) onelayer = np.concatenate( (onelayer, np.array([Correlation, Intersect, Bhattacharyya ]).T), axis=1) header += [ normal + '/' + mutant + '_Correlation_' + f"{agreement:.1f}", normal + '/' + mutant + '_Intersect_' + f"{agreement:.1f}", normal + '/' + mutant + '_Bhattacharyya_' + f"{agreement:.1f}" ] rankonelayer = np.concatenate( (rankonelayer, np.array([Intersect]).T), axis=1) rankheader += [normal + '/' + mutant + '_' + f"{agreement:.1f}"] layers.append(onelayer) ranking.append(rankonelayer) f = open(os.path.join(resultrootdir, tag, 'aggregated.csv'), 'w') io_utils.writeline(f, io_utils.delimited_list(header)) for ly in range(len(layerlist)): for node in layers[ly]: line = [layerlist[ly]] + list(node) line[1] = int(line[1]) # node ids are changed from float to int io_utils.writeline(f, io_utils.delimited_list(line)) f.close() f = open(os.path.join(resultrootdir, tag, 'ranking.csv'), 'w') io_utils.writeline(f, io_utils.delimited_list(rankheader)) for ly in range(len(layerlist)): for node in ranking[ly]: line = [layerlist[ly]] + list(node) line[1] = int(line[1]) # node ids are changed from float to int io_utils.writeline(f, io_utils.delimited_list(line)) f.close()
def correlation(datasetrootdir, resultrootdir, normal, mutant, layerlist, savebinary): """ correlation between activation and existing feature Parameters ======================================= datasetrootdir : str root directory of dataset resultrootdir : str root directory for result normal : str ID of normal worm mutant : str ID of mutant worm layerlist : list of str list of layer savebinary : bool whether scores are saved by binary """ tag = normal + '_vs_' + mutant normalfeaturelist, header = io_utils.get_features( os.path.join(datasetrootdir, normal, const.allfeature)) mutantfeaturelist, header = io_utils.get_features( os.path.join(datasetrootdir, mutant, const.allfeature)) normal_timesteps = [x.shape[1] for x in normalfeaturelist] mutant_timesteps = [x.shape[1] for x in mutantfeaturelist] normalfeature = np.concatenate(normalfeaturelist, axis=1) mutantfeature = np.concatenate(mutantfeaturelist, axis=1) normalcorlist = [] mutantcorlist = [] conccorlist = [] for layer in layerlist: print(layer) normalscorelist = io_utils.get_nodescores( os.path.join(resultrootdir, tag, normal), layer, savebinary, normal_timesteps) mutantscorelist = io_utils.get_nodescores( os.path.join(resultrootdir, tag, mutant), layer, savebinary, mutant_timesteps) # correlation between raw features and activations # normalscore, normalfeature = arrange(normalscorelist, normalfeaturelist) normalscore = np.concatenate(normalscorelist, axis=1) normalcor = np.corrcoef( normalfeature, normalscore)[len(normalfeature):, :len(normalfeature)] # mutantscore, mutantfeature = arrange(mutantscorelist, mutantfeaturelist) mutantscore = np.concatenate(mutantscorelist, axis=1) mutantcor = np.corrcoef( mutantfeature, mutantscore)[len(mutantfeature):, :len(mutantfeature)] concatenated_score = np.concatenate((normalscore, mutantscore), axis=1) concatenated_feature = np.concatenate((normalfeature, mutantfeature), axis=1) concatenated_correlation = np.corrcoef( concatenated_feature, concatenated_score)[ len(concatenated_feature):, :len(concatenated_feature)] # resultlist.append(np.concatenate((normalcor, mutantcor, concatenated_correlation),axis=1)) normalcorlist.append(normalcor) mutantcorlist.append(mutantcor) conccorlist.append(concatenated_correlation) header = ['layer', 'node'] + header f = open( os.path.join(resultrootdir, tag, 'correlation_' + normal + '.csv'), 'w') io_utils.writeline(f, io_utils.delimited_list(header)) for ly in range(len(layerlist)): for node in range(len(normalcorlist[ly])): line = [layerlist[ly], str(node)] + list(normalcorlist[ly][node]) io_utils.writeline(f, io_utils.delimited_list(line)) f.close() f = open( os.path.join(resultrootdir, tag, 'correlation_' + mutant + '.csv'), 'w') io_utils.writeline(f, io_utils.delimited_list(header)) for ly in range(len(layerlist)): for node in range(len(mutantcorlist[ly])): line = [layerlist[ly], str(node)] + list(mutantcorlist[ly][node]) io_utils.writeline(f, io_utils.delimited_list(line)) f.close() f = open(os.path.join(resultrootdir, tag, 'correlation.csv'), 'w') io_utils.writeline(f, io_utils.delimited_list(header)) for ly in range(len(layerlist)): for node in range(len(conccorlist[ly])): line = [layerlist[ly], str(node)] + list(conccorlist[ly][node]) io_utils.writeline(f, io_utils.delimited_list(line)) f.close()
def compare_attended(datasetrootdir, resultrootdir, normal, mutant, layerlist, savebinary): """ compare existing features in attended segments Parameters ======================================= datasetrootdir : str root directory of dataset resultrootdir : str root directory for result normal : str ID of normal worm mutant : str ID of mutant worm layerlist : list of str list of layer savebinary : bool whether scores are saved by binary """ tag = normal + '_vs_' + mutant normalfeaturelist, header = io_utils.get_features( os.path.join(datasetrootdir, normal, const.allfeature)) mutantfeaturelist, header = io_utils.get_features( os.path.join(datasetrootdir, mutant, const.allfeature)) normal_timesteps = [x.shape[1] for x in normalfeaturelist] mutant_timesteps = [x.shape[1] for x in mutantfeaturelist] normalfeature = np.concatenate(normalfeaturelist, axis=1) mutantfeature = np.concatenate(mutantfeaturelist, axis=1) feature_hist_file = os.path.join(resultrootdir, tag, "attended_feature_diff.csv") f = open(feature_hist_file, 'w') io_utils.writeline(f, io_utils.delimited_list(['layer'] + header)) for layer in layerlist: if layer.startswith("attention") and "last" not in layer: one_line = [layer] hist_dir = os.path.join(resultrootdir, tag, "attended_feature_hist", layer) if not os.path.exists(hist_dir): os.makedirs(hist_dir) print(layer) normalscorelist = io_utils.get_nodescores( os.path.join(resultrootdir, tag, normal), layer, savebinary, normal_timesteps) mutantscorelist = io_utils.get_nodescores( os.path.join(resultrootdir, tag, mutant), layer, savebinary, mutant_timesteps) normalscore_all = np.concatenate(normalscorelist, axis=1) mutantscore_all = np.concatenate(mutantscorelist, axis=1) attn_max = min(np.nanmax(normalscore_all), np.nanmax(mutantscore_all)) attn_min = min(np.nanmin(normalscore_all), np.nanmin(mutantscore_all)) attn_th = (attn_max - attn_min) * 0.5 + attn_min print("attn_max", attn_max) print("attn_min", attn_min) print("attn_th", attn_th) for feat_idx, feature in enumerate(header): print(feature) norm_attended_features = [] mutant_attended_features = [] for features, scores in zip(normalfeaturelist, normalscorelist): #each trajectory mask = scores[0] > attn_th #plt.plot(np.arange(len(features[feat_idx])), features[feat_idx]) #plt.show() #plt.plot(np.arange(len(scores[0])), scores[0]) #plt.show() #plt.plot(np.arange(len(mask)), mask) #plt.show() masked_feature = features[feat_idx][ mask[:len(features[feat_idx])]] #plt.hist(masked_feature) #plt.show() norm_attended_features = norm_attended_features + masked_feature.tolist( ) for features, scores in zip(mutantfeaturelist, mutantscorelist): #each trajectory mask = scores[0] > attn_th #plt.plot(np.arange(len(features[feat_idx])), features[feat_idx]) #plt.show() #plt.plot(np.arange(len(scores[0])), scores[0]) ##plt.show() #plt.plot(np.arange(len(mask)), mask) #plt.show() masked_feature = features[feat_idx][ mask[:len(features[feat_idx])]] #plt.hist(masked_feature) #plt.show() mutant_attended_features = mutant_attended_features + masked_feature.tolist( ) feat_max = max(np.max(norm_attended_features), np.max(mutant_attended_features)) feat_min = min(np.min(norm_attended_features), np.min(mutant_attended_features)) hist1 = histogram.calcHist( norm_attended_features, 100, [feat_min, feat_max]) #, density=True) hist1 = hist1 / np.sum(hist1) hist2 = histogram.calcHist( mutant_attended_features, 100, [feat_min, feat_max]) #, density=True) hist2 = hist2 / np.sum(hist2) bins = np.linspace(feat_min, feat_max, 101) inverse_overlap = 1.0 - histogram.compareHist(hist1, hist2, 2) one_line.append(str(inverse_overlap)) #plt.hist(norm_attended_features) #plt.hist(mutant_attended_features) #plt.show() print("inverse_overlap", inverse_overlap) feature_fname = re.sub(r'[\\|/|:|?|.|"|<|>|\|]', '-', feature) np.savetxt(os.path.join( hist_dir, str(feat_idx) + '-' + feature_fname + '.csv'), np.vstack((bins[:-1], bins[1:], hist1, hist2)).T, header='start,end,' + normal + ',' + mutant, delimiter=',') #break io_utils.writeline(f, io_utils.delimited_list(one_line)) #break f.close()
def test(model, X_normal, X_mutant, batch_size, normal, mutant, F_normal_test=None, F_mutant_test=None, savedir=''): """ Evaluate model Parameters ======================================= model : keras.models The model predict test data. X_normal : 3-dimension array [numdata, time, numfeature] Dimension of input data. X_mutant : 3-dimension array [numdata, time, numfeature] Dimension of input data. batch_size : int The batch size for testing. F_normal_test : list of str, optional filenames of X_normal. F_mutant_test : list of str, optional filenames of X_mutant. savedir : str, optional The directory for saveing result. If this parameter is '', the result is not stored. """ pd_normal = predict(model, X_normal, batch_size) pd_mutant = predict(model, X_mutant, batch_size) est_normal = np.argmax(pd_normal, axis=1) est_mutant = np.argmax(pd_mutant, axis=1) accuracy, precision_normal, recall_normal, fmeasure_normal, precision_mutant, recall_mutant, fmeasure_mutant = evaluate( est_normal, est_mutant) avg_precision = np.mean((precision_normal, precision_mutant)) avg_recall = np.mean((recall_normal, recall_mutant)) avg_fmeasure = np.mean((fmeasure_normal, fmeasure_mutant)) print('accuracy : ' + str(accuracy)) print('precision (' + normal + ') : ' + str(precision_normal) + ', precision (' + mutant + ') : ' + str(precision_mutant)) print('recall (' + normal + ') : ' + str(recall_normal) + ', recall (' + mutant + ') : ' + str(recall_mutant)) print('f-measure (' + normal + ') : ' + str(fmeasure_normal) + ', f-measure (' + mutant + ') : ' + str(fmeasure_mutant)) print('Avg precision : ' + str(avg_precision)) print('Avg recall : ' + str(avg_recall)) print('Avg f-measure : ' + str(avg_fmeasure)) if savedir != '': if not os.path.exists(savedir): os.makedirs(savedir) result_file = open(os.path.join(savedir, 'result.txt'), 'w') result_file.write('accuracy : ' + str(accuracy) + os.linesep) result_file.write('\t precision \t recall \t f-measure' + os.linesep) result_file.write(normal + ' \t ' + str(precision_normal) + ' \t ' + str(recall_normal) + ' \t ' + str(fmeasure_normal) + os.linesep) result_file.write(mutant + ' \t ' + str(precision_mutant) + ' \t ' + str(recall_mutant) + ' \t ' + str(fmeasure_mutant) + os.linesep) result_file.write('Avg \t ' + str(avg_precision) + ' \t ' + str(avg_recall) + ' \t ' + str(avg_fmeasure)) result_file.close() if F_normal_test is not None and F_mutant_test is not None: n_file_result_list = [] m_file_result_list = [] resultlist = est_normal == 0 for i in range(len(est_normal)): n_file_result_list.append( [F_normal_test[i], str(resultlist[i])]) # failure_file.write(F_normal_test[i] + ',' + str(resultlist[i]) + os.linesep) resultlist = est_mutant == 1 for i in range(len(est_mutant)): m_file_result_list.append( [F_mutant_test[i], str(resultlist[i])]) # failure_file.write(F_mutant_test[i] + ',' + str(resultlist[i]) + os.linesep) n_file_result_list.sort(key=io_utils.cmp_to_key( lambda a, b: io_utils.compare_filename(a[0], b[0]))) m_file_result_list.sort(key=io_utils.cmp_to_key( lambda a, b: io_utils.compare_filename(a[0], b[0]))) failure_file = open(os.path.join(savedir, 'failure.txt'), 'w') for sl in n_file_result_list: failure_file.write(io_utils.delimited_list(sl)) failure_file.write('\n') for sl in m_file_result_list: failure_file.write(io_utils.delimited_list(sl)) failure_file.write('\n') failure_file.close() return est_normal, est_mutant