def preprocess_record(record_name): (X_train, y_train, recordLength,arousals_originale) = loaddata(record_name) # Ignore records that do not contain any arousals if 1 not in arousals_originale: L.log_info('Record discarded. no arousals found in %s\n' % record_name) return model = model_eval(X_train, y_train, p_INPUT_FEAT, p_OUTPUT_CLASS, record_name, recordLength)
def init(): # Create the 'models' subdirectory and delete any existing model files try: os.mkdir('models') except OSError: pass # Create the 'tensorboard' subdirectory try: os.mkdir('tensorboard') except OSError: pass for f in glob.glob('models/LSTM_*.hdf5'): os.remove(f) for f in glob.glob('tensorboard/*'): shutil.rmtree(f, ignore_errors=True) stringInit = "" stringInit += str( "\r\n*************************** init ***********************************" ) stringInit += str("\r\nFs (frequenza di campionamento segnali): " + str(Fs)) stringInit += str( "\r\np_WINDOW_SIZE=x*Fs dimensione della serie storica passata al modello: " + str(p_WINDOW_SIZE)) stringInit += str("\r\np_INPUT_FEAT=13 numero di segnali in input: " + str(p_INPUT_FEAT)) stringInit += str( "\r\np_OUTPUT_CLASS=3 # 1,0,-1 (total of 3) - numero di classi contenute nel tracciato di target y (arousals): " + str(p_OUTPUT_CLASS)) stringInit += str( "\r\np_BATCH_SIZE=1000 numero di campioni per volta passati al modello " + str(p_BATCH_SIZE)) stringInit += str( "\r\np_EPOCHS=75 epoche, numero di volte che lo stesso report viene fatto ripassare: " + str(p_EPOCHS)) stringInit += str( "\r\np_MODEL_FILE - file dove salvo i pesi del modello:" + str(p_MODEL_FILE)) stringInit += str("\r\np_DATASET_DIR - directory del dataset:" + str(phyc.p_DATASET_DIR)) stringInit += str("\r\np_LOG_FILE - log testuale in CSV:" + str(p_LOG_FILE)) stringInit += str("\r\np_KERAS_LOG_FILE - log testuale in CSV:" + str(p_KERAS_LOG_FILE)) stringInit += str( "\r\np_TENSORBOARD_LOGDIR - directory di log per Tensorboard:" + str(p_TENSORBOARD_LOGDIR)) stringInit += str( "\r\n********************************************************************" ) L.log_info(stringInit)
def loaddata(record_name): L.log_info("Loading record: " + str(record_name)) header_file = record_name + '.hea' signal_file = record_name + '.mat' arousal_file = record_name + '-arousal.mat' # Get the signal names from the header file signal_names, Fs, n_samples = phyc.import_signal_names(header_file) signal_names = list(np.append(signal_names, 'arousals')) print("signal_names: " + str(signal_names)) print("Fs: " + str(Fs)) print("n_samples: " + str(n_samples)) # Convert this subject's data into a pandas dataframe this_data = phyc.get_subject_data(arousal_file, signal_file, signal_names) # ---------------------------------------------------------------------- # Generate the Features for the classificaition model - variance of SaO2 # ---------------------------------------------------------------------- # For the baseline, let's only look at how SaO2 might predict arousals arousals_originale = this_data.get(['arousals']).values SaO2 = this_data.get(['ECG']).values recordLength = SaO2.size signals=None arousals=None if(p_INPUT_FEAT==1): SaO2 = instfreq(SaO2) SaO2 = scale(SaO2) #SaO2 = standardize(SaO2) signals, arousals = phyc.signalsToMatrix(SaO2, arousals_originale, recordLength, p_WINDOW_SIZE, p_INPUT_FEAT); print("signals shape:" + str(signals.shape)) if(p_INPUT_FEAT==13): ECG = this_data.get(['ECG']).values print("ECG shape:"+str(ECG.shape)) ECG = instfreq(ECG) print("ECG shape:" + str(ECG.shape)) signals = this_data[ ['F3-M2', 'F4-M1', 'C3-M2', 'C4-M1', 'O1-M2', 'O2-M1', 'E1-M2', 'Chin1-Chin2', 'ABD', 'CHEST', 'AIRFLOW', 'SaO2']].values signals = np.append(signals,ECG, axis=1) #signals = instfreq(signals) signals = scale(signals) #signals = standardize(signals) print("signals shape:" + str(signals.shape)) print("signals shape:" + str(signals[0:20,:])) signals, arousals = phyc.signalsToMatrix(signals, arousals_originale, recordLength, p_WINDOW_SIZE, p_INPUT_FEAT); #signals = signals.values return signals, arousals,recordLength,arousals_originale
def score_training_set(model): try: os.mkdir('training_output') except OSError: pass # Generate a data frame that points to the challenge files tr_files, te_files = phyc.get_files() j = 0 score = Challenge2018Score() for i in range(0, np.size(tr_files, 0)): gc.collect() sys.stdout.write('\nEvaluating training subject: %d/%d' % (i + 1, np.size(tr_files, 0))) sys.stdout.flush() record_name = tr_files.header.values[i][:-4] predictions, pred_arousal_probabilities, model = T.classify_record( record_name, model) arousals = phyc.import_arousals(tr_files.arousal.values[i]) #appiattisce in un array 1D arousals = np.ravel(arousals) print("arousals.shape: " + str(arousals.shape)) print("predictions.shape: " + str(predictions.shape)) print("pred_arousal_probabilities.shape: " + str(pred_arousal_probabilities.shape)) print_arousal_predictions(arousals, pred_arousal_probabilities) score.score_record(arousals, pred_arousal_probabilities, record_name) auroc = score.record_auroc(record_name) auprc = score.record_auprc(record_name) L.log_info(' AUROC:%f AUPRC:%f' % (auroc, auprc)) # save also training predictions to evaluate voting solutions between different models output_file = "training_output/" + os.path.basename( record_name) + '.vec' L.log_info("Salvo i files esito del training in " + str(output_file)) np.savetxt(output_file, pred_arousal_probabilities, fmt='%.3f') print() auroc_g = score.gross_auroc() auprc_g = score.gross_auprc() L.log_info('Training AUROC Performance (gross): %f' % auroc_g) L.log_info('Training AUPRC Performance (gross): %f' % auprc_g) L.log_info("\n\r ") return model
def loaddata(record_name): L.log_info("Loading record: " + str(record_name)) header_file = record_name + '.hea' signal_file = record_name + '.mat' arousal_file = record_name + '-arousal.mat' # Get the signal names from the header file signal_names, Fs, n_samples = phyc.import_signal_names(header_file) signal_names = list(np.append(signal_names, 'arousals')) this_data = phyc.get_subject_data(arousal_file, signal_file, signal_names) SaO2 = this_data.get(['SaO2']).values arousals = this_data.get(['arousals']).values recordLength = SaO2.size #print(this_data) #print(this_data.values) signals = this_data[ ['F3-M2', 'F4-M1', 'C3-M2', 'C4-M1', 'O1-M2', 'O2-M1', 'E1-M2', 'Chin1-Chin2', 'ABD', 'CHEST', 'AIRFLOW', 'SaO2', 'ECG']] signals = signals.values arousals=arousals.astype(np.int32) return signals,arousals, recordLength,SaO2
def evaluate_test_set(): # Generate a data frame that points to the challenge files tr_files, te_files = phyc.get_files() try: os.mkdir('test_output') except OSError: pass for f in glob.glob('models/*.vec'): os.remove(f) for i in range(0, np.size(te_files, 0)): gc.collect() L.log_info('Evaluating test subject: %d/%d' % (i + 1, np.size(te_files, 0))) record_name = te_files.header.values[i][:-4] output_file = "test_output/" + os.path.basename(record_name) + '.vec' L.log_info("Salvo i files esito del test in " + str(output_file)) predictions, pred_arousal_probabilities, model = T.classify_record( record_name, model) #np.savetxt(output_file, predictions [:,1], fmt='%.3f') np.savetxt(output_file, pred_arousal_probabilities, fmt='%.3f')
def model_eval(X,y, p_INPUT_FEAT, p_OUTPUT_CLASS, record_name, record_length): batch =p_BATCH_SIZE epochs = p_EPOCHS # reduced from 120 rep = 1 # K fold procedure can be repeated multiple times Kfold = 3 # ENRICO reduced from 5 #Ntrain = int(record_length/9000) #8528 # number of recordings on training set Ntrain = int(record_length/p_WINDOW_SIZE) Nsamp = int(Ntrain/Kfold) # number of recordings to take as validation #/10 X_train=X print("model eval x shape:" +str(X.shape)) # Need to add dimension for training #X = np.expand_dims(X, axis=2) classes = ['1', '0', '-1'] Nclass = len(classes) print("Nclass"+str(Nclass)) # provo ad aggiungere un peso per controbilanciare la scarsità di 1 nei record print("y " + str(y)) y_ints = [yi.argmax() for yi in y] class_weights = class_weight.compute_class_weight('balanced', np.unique(y_ints), y_ints) #class_weights='auto' L.log_info("CLASS Wheigths" + str(class_weights)) # ENRICO la matrice di confusione serve a predirre i valori veri vs trovati cvconfusion = np.zeros((Nclass,Nclass,Kfold*rep)) cvscores = [] counter = 0 # repetitions of cross validation for r in range(rep): print("Rep %d"%(r+1)) # cross validation loop for k in range(Kfold): print("Cross-validation run %d"%(k+1)) # Callbacks definition callbacks = [ # Early stopping definition EarlyStopping(monitor='val_loss',mode='min', patience=5, verbose=1), # Decrease learning rate by 0.1 factor AdvancedLearnignRateScheduler(monitor='val_loss', patience=1,verbose=1, mode='min', decayRatio=0.01), # Saving best model ModelCheckpoint('models/'+p_MODEL_FILE+'_k{}_r{}.hdf5'.format(k,r), monitor='val_loss', mode='min', save_best_only=True, verbose=1), ModelCheckpoint('models/' + p_MODEL_FILE + '_best.hdf5', monitor='val_loss', mode='min', save_best_only=True, verbose=1), CSVLogger('logs/' + p_KERAS_LOG_FILE, separator=',', append=False), TensorBoard(log_dir='tensorboard/' + str(p_TENSORBOARD_LOGDIR), histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None), ] #print("loading model with window_size: "+str(WINDOW_SIZE)) # Load model model = ResNet_model() try: model.load_weights('models/' + p_MODEL_FILE + '_k{}_r{}.hdf5'.format(k, r)) except: print("non ho trovato i pesi. parto a zero") # split train and validation sets idxval = np.random.choice(Ntrain, Nsamp,replace=False) idxtrain = np.invert(np.in1d(range(X_train.shape[0]),idxval)) ytrain = y[np.asarray(idxtrain),:] Xtrain = X[np.asarray(idxtrain),:,:] Xval = X[np.asarray(idxval),:,:] yval = y[np.asarray(idxval),:] # Train model #model.fit(Xtrain, ytrain, model.fit(X, y, validation_data=(Xval, yval), epochs=epochs, batch_size=batch, class_weight=class_weights, callbacks=callbacks) # Evaluate best trained model """model.load_weights('models/'+p_MODEL_FILE+'_k{}_r{}.hdf5'.format(k,r)) #enrico addedd https://github.com/matterport/Mask_RCNN/issues/588 model._make_predict_function() #ypred = model.predict(Xval) ypred = model.predict(X) yval=y #print("yval SHAPE" + str(yval.shape)) #print ("ypred SHAPE"+str(ypred.shape) ) #confronto la previsione della colonna 0 (quella con valore 1) arousals=yval[:,0] predictions=ypred[:,0] """ K.clear_session() gc.collect() config = tf.ConfigProto() config.gpu_options.allow_growth=True sess = tf.Session(config=config) K.set_session(sess) counter += 1 # Saving cross validation results #scipy.io.savemat('xval_results.mat',mdict={'cvconfusion': cvconfusion.tolist()}) return model#,arousals, predictions
def preprocess_record(record_name, model): signals,arousals, recordLength,SaO2 = loaddata(record_name) # Ignore records that do not contain any arousals if 1 not in arousals: L.log_info('no arousals found in %s\n' % record_name) return # We select a window size of 60 seconds with no overlap to compute # the features step=p_WINDOW_SIZE window_size=p_WINDOW_SIZE n_samples = recordLength # Initialize the matrices that store our training data X_subj = np.zeros([((n_samples) // step), 1]) Y_subj = np.zeros([((n_samples) // step), 1]) for idx, k in enumerate(range(0, (n_samples-step+1), step)): X_subj[idx, 0] = np.var(np.transpose(SaO2[k:k+window_size]), axis=1) Y_subj[idx] = np.max(arousals[k:k+window_size]) #scalo i segnali di input nell'intervallo da -1 a 1 #signals=scale(signals) #x=signals.reshape(1, recordLength, 13) #x=strided_axis0_backward(signals,p_WINDOW_SIZE) """y=arousals print("x shape: "+ str(x.shape)) print("y shape: "+ str(y.shape)) # Convert y2 to dummy variables y2 = np.zeros((y.shape[0], p_OUTPUT_CLASS), dtype=np.float32) index=0; for yi in np.nditer(y): if yi == 0: y2[index, 0] = 1 # print("messo a zero") if yi == 1: y2[index, 1] = 1 if yi == -1: y2[index, 2] = 1 # print("messo a - uno") index = index + 1 #y2[np.arange(y.shape[0]), y] = 1.0 print("y2 shape: "+ str(y2.shape)) # print("x: "+str(x)) #print("y: "+str(y)) #print("y2: "+str(y2)) y2sum= y2.sum(axis=0) print("y2 sum: "+ str(y2sum)) """ callbacks = [ # Early stopping definition # EarlyStopping(monitor='val_loss', patience=3, verbose=1), EarlyStopping(monitor='val_loss', patience=3, verbose=1), # Decrease learning rate by 0.1 factor AdvancedLearnignRateScheduler(monitor='val_loss', patience=3, verbose=1, mode='auto', decayRatio=0.1), # Saving best model ModelCheckpoint('models/'+str(os.path.basename(record_name))+str(p_MODEL_FILE), monitor='val_loss', save_best_only=True, verbose=1), CSVLogger('logs/'+p_KERAS_LOG_FILE, separator=',', append=False), TensorBoard(log_dir='tensorboard/'+str(p_TENSORBOARD_LOGDIR), histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None) ] if (model is None): model =LogisticRegressionKeras() # split train and validation sets """idxval_start = np.random.randint(np.trunc(recordLength/2), size=1)[0] idxval_size = np.random.randint(np.trunc(recordLength / 2), size=1)[0] #print(idxval_start) #print(idxval_size) Xval = x[idxval_start:idxval_start+idxval_size, :, :] Yval = y2[idxval_start:idxval_start + idxval_size, :] """ try: model.load_weights('models/'+str(record_name) + str(p_MODEL_FILE)) except Exception as e: print("non ho trovato i pesi (file " + str(p_MODEL_FILE) + "). parto a zero") print(str(str(e))) print('Train...') model.fit(X_subj,np.ravel(Y_subj), validation_data=(X_subj,np.ravel(Y_subj)),epochs=p_EPOCHS,batch_size=p_BATCH_SIZE, callbacks=callbacks) #200 #model.fit(x,y2, validation_data=(Xval,Yval),epochs=p_EPOCHS,batch_size=p_BATCH_SIZE, callbacks=callbacks) #200 """ pred = model.predict(x) predict_classes = np.argmax(pred,axis=1) predict_classes[predict_classes == 2] = -1 #print("Predicted classes: {}",predict_classes) #print("Expected classes: {}",y) print("Predicted sum: {}",predict_classes.sum(axis=0)) print("Expected sum: {}",y.sum(axis=0)) """ try: model.save_weights('models/'+str(os.path.basename(record_name))+str(p_MODEL_FILE)) except: L.log_info("non riesco a salvare i pesi. parto a zero") return model
def preprocess_record(record_name, model): signals, arousals, recordLength = loaddata(record_name) # Ignore records that do not contain any arousals if 1 not in arousals: L.log_info('no arousals found in %s\n' % record_name) return #scalo i segnali di input nell'intervallo da -1 a 1 signals = scale(signals) #x=signals.reshape(1, recordLength, 13) x = strided_axis0_backward(signals, p_WINDOW_SIZE) y = arousals #utilizzo un overlapping parziale per ridurre il tempo di training overlapping = p_WINDOW_SIZE x = x[1::overlapping, :, :] y = y[1::overlapping, :] print("x shape: " + str(x.shape)) print("y shape: " + str(y.shape)) # Convert y2 to dummy variables y2 = np.zeros((y.shape[0], p_OUTPUT_CLASS), dtype=np.float32) index = 0 for yi in np.nditer(y): if yi == 0: y2[index, 0] = 1 # print("messo a zero") if yi == 1: y2[index, 1] = 1 if yi == -1: y2[index, 2] = 1 # print("messo a - uno") index = index + 1 #y2[np.arange(y.shape[0]), y] = 1.0 print("y2 shape: " + str(y2.shape)) # print("x: "+str(x)) #print("y: "+str(y)) #print("y2: "+str(y2)) #y2 = y2.reshape(11925, 1, 3) y2sum = y2.sum(axis=0) print("y2 sum: " + str(y2sum)) # provo ad aggiungere un peso per controbilanciare la scarsità di 1 nei record unique, counts = np.unique(arousals, return_counts=True) print("arousals count valori:" + str(dict(zip(unique, counts)))) categorical_labels = to_categorical(arousals, num_classes=None) y = categorical_labels print("y " + str(y)) y_ints = [yi.argmax() for yi in y2] unique, counts = np.unique(y_ints, return_counts=True) print("y_ints count valori:" + str(dict(zip(unique, counts)))) class_weights = class_weight.compute_class_weight('balanced', np.unique(y_ints), y_ints) # class_weights='auto' L.log_info("CLASS Wheigths" + str(class_weights)) callbacks = [ # Early stopping definition EarlyStopping(monitor='loss', patience=3, verbose=1), # Decrease learning rate by 0.1 factor #sAdvancedLearnignRateScheduler(monitor='loss', patience=1, verbose=1, mode='auto', decayRatio=0.1), # Saving best model ModelCheckpoint('models/' + str(p_MODEL_FILE), monitor='loss', save_best_only=True, verbose=1), CSVLogger('logs/' + p_KERAS_LOG_FILE, separator=',', append=False), TensorBoard(log_dir='tensorboard/' + str(p_TENSORBOARD_LOGDIR), histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None) ] if (model is None): #model =LSTM_model() model = Bi_LSTM_model() # split train and validation sets idxval_start = np.random.randint(np.trunc(recordLength / 2), size=1)[0] idxval_size = np.random.randint(np.trunc(recordLength / 2), size=1)[0] #print(idxval_start) #print(idxval_size) Xval = x[idxval_start:idxval_start + idxval_size, :, :] Yval = y2[idxval_start:idxval_start + idxval_size, :] print('Train...') model.fit(x, y2, validation_data=(Xval, Yval), epochs=p_EPOCHS, class_weight=class_weights, batch_size=p_BATCH_SIZE, callbacks=callbacks) #200 pred = model.predict(x) predict_classes = np.argmax(pred, axis=1) predict_classes[predict_classes == 2] = -1 #print("Predicted classes: {}",predict_classes) #print("Expected classes: {}",y) print("Predicted sum: {}", predict_classes.sum(axis=0)) print("Expected sum: {}", y.sum(axis=0)) try: model.save_weights('models/' + str(p_MODEL_FILE)) except: L.log_info("non ho trovato i pesi. parto a zero") return model
for dirName, subdirList, fileList in os.walk('.'): for fname in fileList: if ('.vec' in fname[-4:] or '.py' in fname[-3:] or '.pkl' in fname[-4:] or '.txt' in fname[-4:] or '.log' in fname[-4:] or '.hdf5' in fname[-5:]): myzip.write(os.path.join(dirName, fname)) # ----------------------------------------------------------------------------- # MAIN FUNCTION # ----------------------------------------------------------------------------- if __name__ == '__main__': os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' L.init_logger(T.p_LOG_FILE) dtInizioElaborazione = datetime.datetime.now() L.log_info("Execution starts at: " + str(dtInizioElaborazione)) try: L.log_info( "############################## TRAIN ##############################" ) train() L.log_info( "###################### SCORE TRAINING SET #########################" ) score_training_set() L.log_info( "###################### EVALUATE TEST SET #########################" ) evaluate_test_set() L.log_info( "####################### PACKAGE ENTRY #############################"