예제 #1
0
def preprocess_record(record_name):
    (X_train, y_train, recordLength,arousals_originale) = loaddata(record_name)

    # Ignore records that do not contain any arousals
    if 1 not in arousals_originale:
        L.log_info('Record discarded. no arousals found in %s\n' % record_name)
        return

    model = model_eval(X_train, y_train, p_INPUT_FEAT, p_OUTPUT_CLASS, record_name, recordLength)
def init():

    # Create the 'models' subdirectory and delete any existing model files
    try:
        os.mkdir('models')
    except OSError:
        pass
    # Create the 'tensorboard' subdirectory
    try:
        os.mkdir('tensorboard')
    except OSError:
        pass

    for f in glob.glob('models/LSTM_*.hdf5'):
        os.remove(f)

    for f in glob.glob('tensorboard/*'):
        shutil.rmtree(f, ignore_errors=True)

    stringInit = ""
    stringInit += str(
        "\r\n*************************** init ***********************************"
    )
    stringInit += str("\r\nFs (frequenza di campionamento segnali): " +
                      str(Fs))
    stringInit += str(
        "\r\np_WINDOW_SIZE=x*Fs   dimensione della serie storica passata al modello: "
        + str(p_WINDOW_SIZE))
    stringInit += str("\r\np_INPUT_FEAT=13      numero di segnali in input: " +
                      str(p_INPUT_FEAT))
    stringInit += str(
        "\r\np_OUTPUT_CLASS=3     # 1,0,-1 (total of 3) - numero di classi contenute nel tracciato di target y (arousals):  "
        + str(p_OUTPUT_CLASS))
    stringInit += str(
        "\r\np_BATCH_SIZE=1000    numero di campioni per volta passati al modello "
        + str(p_BATCH_SIZE))
    stringInit += str(
        "\r\np_EPOCHS=75          epoche, numero di volte che lo stesso report viene fatto ripassare: "
        + str(p_EPOCHS))
    stringInit += str(
        "\r\np_MODEL_FILE - file dove salvo i pesi del modello:" +
        str(p_MODEL_FILE))
    stringInit += str("\r\np_DATASET_DIR - directory del dataset:" +
                      str(phyc.p_DATASET_DIR))
    stringInit += str("\r\np_LOG_FILE - log testuale in CSV:" +
                      str(p_LOG_FILE))
    stringInit += str("\r\np_KERAS_LOG_FILE - log testuale in CSV:" +
                      str(p_KERAS_LOG_FILE))
    stringInit += str(
        "\r\np_TENSORBOARD_LOGDIR - directory di log per Tensorboard:" +
        str(p_TENSORBOARD_LOGDIR))
    stringInit += str(
        "\r\n********************************************************************"
    )
    L.log_info(stringInit)
예제 #3
0
def loaddata(record_name):
    L.log_info("Loading record: " + str(record_name))
    header_file = record_name + '.hea'
    signal_file = record_name + '.mat'
    arousal_file = record_name + '-arousal.mat'

    # Get the signal names from the header file
    signal_names, Fs, n_samples = phyc.import_signal_names(header_file)
    signal_names = list(np.append(signal_names, 'arousals'))
    print("signal_names: " + str(signal_names))
    print("Fs: " + str(Fs))
    print("n_samples: " + str(n_samples))
    # Convert this subject's data into a pandas dataframe
    this_data = phyc.get_subject_data(arousal_file, signal_file, signal_names)

    # ----------------------------------------------------------------------
    # Generate the Features for the classificaition model - variance of SaO2
    # ----------------------------------------------------------------------

    # For the baseline, let's only look at how SaO2 might predict arousals


    arousals_originale = this_data.get(['arousals']).values
    SaO2 = this_data.get(['ECG']).values
    recordLength = SaO2.size
    signals=None
    arousals=None
    if(p_INPUT_FEAT==1):
        SaO2 = instfreq(SaO2)
        SaO2 = scale(SaO2)
        #SaO2 = standardize(SaO2)
        signals, arousals = phyc.signalsToMatrix(SaO2, arousals_originale, recordLength, p_WINDOW_SIZE, p_INPUT_FEAT);
        print("signals shape:" + str(signals.shape))
    if(p_INPUT_FEAT==13):
        ECG = this_data.get(['ECG']).values
        print("ECG shape:"+str(ECG.shape))
        ECG = instfreq(ECG)
        print("ECG shape:" + str(ECG.shape))
        signals = this_data[
            ['F3-M2', 'F4-M1', 'C3-M2', 'C4-M1', 'O1-M2', 'O2-M1', 'E1-M2', 'Chin1-Chin2', 'ABD', 'CHEST', 'AIRFLOW',
            'SaO2']].values
        signals = np.append(signals,ECG, axis=1)

        #signals = instfreq(signals)
        signals = scale(signals)
        #signals = standardize(signals)
        print("signals shape:" + str(signals.shape))
        print("signals shape:" + str(signals[0:20,:]))
        signals, arousals = phyc.signalsToMatrix(signals, arousals_originale, recordLength, p_WINDOW_SIZE, p_INPUT_FEAT);
        #signals = signals.values


    return signals, arousals,recordLength,arousals_originale
def score_training_set(model):

    try:
        os.mkdir('training_output')
    except OSError:
        pass

    # Generate a data frame that points to the challenge files
    tr_files, te_files = phyc.get_files()
    j = 0
    score = Challenge2018Score()
    for i in range(0, np.size(tr_files, 0)):
        gc.collect()
        sys.stdout.write('\nEvaluating training subject: %d/%d' %
                         (i + 1, np.size(tr_files, 0)))
        sys.stdout.flush()
        record_name = tr_files.header.values[i][:-4]
        predictions, pred_arousal_probabilities, model = T.classify_record(
            record_name, model)

        arousals = phyc.import_arousals(tr_files.arousal.values[i])
        #appiattisce in un array 1D
        arousals = np.ravel(arousals)

        print("arousals.shape: " + str(arousals.shape))
        print("predictions.shape: " + str(predictions.shape))
        print("pred_arousal_probabilities.shape: " +
              str(pred_arousal_probabilities.shape))

        print_arousal_predictions(arousals, pred_arousal_probabilities)

        score.score_record(arousals, pred_arousal_probabilities, record_name)
        auroc = score.record_auroc(record_name)
        auprc = score.record_auprc(record_name)
        L.log_info(' AUROC:%f AUPRC:%f' % (auroc, auprc))

        # save also training predictions to evaluate voting solutions between different models
        output_file = "training_output/" + os.path.basename(
            record_name) + '.vec'
        L.log_info("Salvo i files esito del training in " + str(output_file))
        np.savetxt(output_file, pred_arousal_probabilities, fmt='%.3f')

    print()
    auroc_g = score.gross_auroc()
    auprc_g = score.gross_auprc()
    L.log_info('Training AUROC Performance (gross): %f' % auroc_g)
    L.log_info('Training AUPRC Performance (gross): %f' % auprc_g)
    L.log_info("\n\r ")
    return model
예제 #5
0
def loaddata(record_name):
    L.log_info("Loading record: " + str(record_name))
    header_file = record_name + '.hea'
    signal_file = record_name + '.mat'
    arousal_file = record_name + '-arousal.mat'
    # Get the signal names from the header file
    signal_names, Fs, n_samples = phyc.import_signal_names(header_file)
    signal_names = list(np.append(signal_names, 'arousals'))
    this_data = phyc.get_subject_data(arousal_file, signal_file, signal_names)
    SaO2 = this_data.get(['SaO2']).values
    arousals = this_data.get(['arousals']).values
    recordLength = SaO2.size
    #print(this_data)
    #print(this_data.values)
    signals = this_data[
        ['F3-M2', 'F4-M1', 'C3-M2', 'C4-M1', 'O1-M2', 'O2-M1', 'E1-M2', 'Chin1-Chin2', 'ABD', 'CHEST', 'AIRFLOW',
         'SaO2', 'ECG']]
    signals = signals.values
    arousals=arousals.astype(np.int32)
    return signals,arousals, recordLength,SaO2
def evaluate_test_set():
    # Generate a data frame that points to the challenge files
    tr_files, te_files = phyc.get_files()
    try:
        os.mkdir('test_output')
    except OSError:
        pass

    for f in glob.glob('models/*.vec'):
        os.remove(f)

    for i in range(0, np.size(te_files, 0)):
        gc.collect()
        L.log_info('Evaluating test subject: %d/%d' %
                   (i + 1, np.size(te_files, 0)))
        record_name = te_files.header.values[i][:-4]
        output_file = "test_output/" + os.path.basename(record_name) + '.vec'
        L.log_info("Salvo i files esito del test in " + str(output_file))
        predictions, pred_arousal_probabilities, model = T.classify_record(
            record_name, model)
        #np.savetxt(output_file, predictions [:,1], fmt='%.3f')
        np.savetxt(output_file, pred_arousal_probabilities, fmt='%.3f')
예제 #7
0
def model_eval(X,y, p_INPUT_FEAT, p_OUTPUT_CLASS, record_name, record_length):
    batch =p_BATCH_SIZE
    epochs = p_EPOCHS   # reduced from 120
    rep = 1         # K fold procedure can be repeated multiple times
    Kfold = 3   # ENRICO reduced from 5
    #Ntrain = int(record_length/9000) #8528 # number of recordings on training set
    Ntrain = int(record_length/p_WINDOW_SIZE)
    Nsamp = int(Ntrain/Kfold) # number of recordings to take as validation #/10
    X_train=X

    print("model eval x shape:" +str(X.shape))
    # Need to add dimension for training
    #X = np.expand_dims(X, axis=2)
    classes = ['1', '0', '-1']
    Nclass = len(classes)
    print("Nclass"+str(Nclass))

    # provo ad aggiungere un peso per controbilanciare la scarsità di 1 nei record
    print("y " + str(y))
    y_ints = [yi.argmax() for yi in y]
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(y_ints),
                                                      y_ints)
                                            

    #class_weights='auto'
    L.log_info("CLASS Wheigths" + str(class_weights))

    # ENRICO la matrice di confusione serve a predirre i valori veri vs trovati
    cvconfusion = np.zeros((Nclass,Nclass,Kfold*rep))
    cvscores = []       
    counter = 0
    # repetitions of cross validation
    for r in range(rep):
        print("Rep %d"%(r+1))
        # cross validation loop
        for k in range(Kfold):
            print("Cross-validation run %d"%(k+1))
            # Callbacks definition
            callbacks = [
                # Early stopping definition
                EarlyStopping(monitor='val_loss',mode='min', patience=5, verbose=1),
                # Decrease learning rate by 0.1 factor
                AdvancedLearnignRateScheduler(monitor='val_loss', patience=1,verbose=1, mode='min', decayRatio=0.01),
                # Saving best model
                ModelCheckpoint('models/'+p_MODEL_FILE+'_k{}_r{}.hdf5'.format(k,r), monitor='val_loss', mode='min', save_best_only=True, verbose=1),
                ModelCheckpoint('models/' + p_MODEL_FILE + '_best.hdf5', monitor='val_loss', mode='min',
                                save_best_only=True, verbose=1),
                CSVLogger('logs/' + p_KERAS_LOG_FILE, separator=',', append=False),
                TensorBoard(log_dir='tensorboard/' + str(p_TENSORBOARD_LOGDIR), histogram_freq=0, batch_size=32,
                            write_graph=True,
                            write_grads=False, write_images=False, embeddings_freq=0,
                            embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None),
                ]
            #print("loading model with window_size: "+str(WINDOW_SIZE))
            # Load model
            model = ResNet_model()

            try:
                model.load_weights('models/' + p_MODEL_FILE + '_k{}_r{}.hdf5'.format(k, r))
            except:
                print("non ho trovato i pesi. parto a  zero")
            
            # split train and validation sets
            idxval = np.random.choice(Ntrain, Nsamp,replace=False)
            idxtrain = np.invert(np.in1d(range(X_train.shape[0]),idxval))
            ytrain = y[np.asarray(idxtrain),:]
            Xtrain = X[np.asarray(idxtrain),:,:]         
            Xval = X[np.asarray(idxval),:,:]
            yval = y[np.asarray(idxval),:]

            # Train model
            #model.fit(Xtrain, ytrain,
            model.fit(X, y,
                      validation_data=(Xval, yval),
                      epochs=epochs, batch_size=batch, class_weight=class_weights, callbacks=callbacks)
            
            # Evaluate best trained model
            """model.load_weights('models/'+p_MODEL_FILE+'_k{}_r{}.hdf5'.format(k,r))
            #enrico addedd  https://github.com/matterport/Mask_RCNN/issues/588
            model._make_predict_function()
            #ypred = model.predict(Xval)
            ypred = model.predict(X)
            yval=y
            #print("yval SHAPE" + str(yval.shape))
            #print ("ypred SHAPE"+str(ypred.shape)  )

            #confronto la previsione della colonna 0 (quella con  valore 1)
            arousals=yval[:,0]
            predictions=ypred[:,0]
            """
            K.clear_session()
            gc.collect()
            config = tf.ConfigProto()
            config.gpu_options.allow_growth=True            
            sess = tf.Session(config=config)
            K.set_session(sess)
            counter += 1

    # Saving cross validation results 
    #scipy.io.savemat('xval_results.mat',mdict={'cvconfusion': cvconfusion.tolist()})
    return model#,arousals, predictions
예제 #8
0
def preprocess_record(record_name, model):
    signals,arousals, recordLength,SaO2 = loaddata(record_name)

    # Ignore records that do not contain any arousals
    if 1 not in arousals:
        L.log_info('no arousals found in %s\n' % record_name)
        return

    # We select a window size of 60 seconds with no overlap to compute
    # the features
    step=p_WINDOW_SIZE
    window_size=p_WINDOW_SIZE
    n_samples = recordLength

    # Initialize the matrices that store our training data
    X_subj = np.zeros([((n_samples) // step), 1])
    Y_subj = np.zeros([((n_samples) // step), 1])

    for idx, k in enumerate(range(0, (n_samples-step+1), step)):
        X_subj[idx, 0] = np.var(np.transpose(SaO2[k:k+window_size]), axis=1)
        Y_subj[idx]    = np.max(arousals[k:k+window_size])

    #scalo i segnali di input nell'intervallo da -1 a 1
    #signals=scale(signals)
    #x=signals.reshape(1, recordLength, 13)
    #x=strided_axis0_backward(signals,p_WINDOW_SIZE)
    """y=arousals
    print("x shape: "+ str(x.shape))
    print("y shape: "+ str(y.shape))

    # Convert y2 to dummy variables
    y2 = np.zeros((y.shape[0], p_OUTPUT_CLASS), dtype=np.float32)

    index=0;
    for yi in np.nditer(y):
        if yi == 0:
            y2[index, 0] = 1  # print("messo a zero")
        if yi == 1:
            y2[index, 1] = 1
        if yi == -1:
            y2[index, 2] = 1  # print("messo a  - uno")
        index = index + 1

    #y2[np.arange(y.shape[0]), y] = 1.0
    print("y2 shape: "+ str(y2.shape))
   # print("x:  "+str(x))
    #print("y:  "+str(y))
    #print("y2:  "+str(y2))

    y2sum= y2.sum(axis=0)

    print("y2 sum: "+ str(y2sum))
    """
    callbacks = [
        # Early stopping definition
        # EarlyStopping(monitor='val_loss', patience=3, verbose=1),
        EarlyStopping(monitor='val_loss', patience=3, verbose=1),
        # Decrease learning rate by 0.1 factor
        AdvancedLearnignRateScheduler(monitor='val_loss', patience=3, verbose=1, mode='auto', decayRatio=0.1),
        # Saving best model
        ModelCheckpoint('models/'+str(os.path.basename(record_name))+str(p_MODEL_FILE), monitor='val_loss', save_best_only=True,
                        verbose=1),
        CSVLogger('logs/'+p_KERAS_LOG_FILE, separator=',', append=False),
        TensorBoard(log_dir='tensorboard/'+str(p_TENSORBOARD_LOGDIR), histogram_freq=0, batch_size=32, write_graph=True,
                                    write_grads=False, write_images=False, embeddings_freq=0,
                                    embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None)
    ]
    if (model is None):
        model =LogisticRegressionKeras()

    # split train and validation sets

    """idxval_start = np.random.randint(np.trunc(recordLength/2), size=1)[0]
    idxval_size = np.random.randint(np.trunc(recordLength / 2), size=1)[0]
    #print(idxval_start)
    #print(idxval_size)
    Xval = x[idxval_start:idxval_start+idxval_size, :, :]
    Yval = y2[idxval_start:idxval_start + idxval_size, :]
    """
    try:
        model.load_weights('models/'+str(record_name) + str(p_MODEL_FILE))
    except Exception as e:
        print("non ho trovato i pesi (file " + str(p_MODEL_FILE) + "). parto a  zero")
        print(str(str(e)))
    print('Train...')
    model.fit(X_subj,np.ravel(Y_subj), validation_data=(X_subj,np.ravel(Y_subj)),epochs=p_EPOCHS,batch_size=p_BATCH_SIZE, callbacks=callbacks) #200
    #model.fit(x,y2, validation_data=(Xval,Yval),epochs=p_EPOCHS,batch_size=p_BATCH_SIZE, callbacks=callbacks) #200
    """
    pred = model.predict(x)
    predict_classes = np.argmax(pred,axis=1)
    predict_classes[predict_classes == 2] = -1

    #print("Predicted classes: {}",predict_classes)
    #print("Expected classes: {}",y)
    print("Predicted sum: {}",predict_classes.sum(axis=0))
    print("Expected sum: {}",y.sum(axis=0))
    """
    try:
      model.save_weights('models/'+str(os.path.basename(record_name))+str(p_MODEL_FILE))
    except:
        L.log_info("non riesco a salvare i pesi. parto a  zero")
    return model
def preprocess_record(record_name, model):
    signals, arousals, recordLength = loaddata(record_name)

    # Ignore records that do not contain any arousals
    if 1 not in arousals:
        L.log_info('no arousals found in %s\n' % record_name)
        return

    #scalo i segnali di input nell'intervallo da -1 a 1
    signals = scale(signals)
    #x=signals.reshape(1, recordLength, 13)
    x = strided_axis0_backward(signals, p_WINDOW_SIZE)
    y = arousals

    #utilizzo un overlapping parziale per ridurre il tempo di training
    overlapping = p_WINDOW_SIZE
    x = x[1::overlapping, :, :]
    y = y[1::overlapping, :]
    print("x shape: " + str(x.shape))
    print("y shape: " + str(y.shape))

    # Convert y2 to dummy variables
    y2 = np.zeros((y.shape[0], p_OUTPUT_CLASS), dtype=np.float32)

    index = 0
    for yi in np.nditer(y):
        if yi == 0:
            y2[index, 0] = 1  # print("messo a zero")
        if yi == 1:
            y2[index, 1] = 1
        if yi == -1:
            y2[index, 2] = 1  # print("messo a  - uno")
        index = index + 1

        #y2[np.arange(y.shape[0]), y] = 1.0
    print("y2 shape: " + str(y2.shape))
    # print("x:  "+str(x))
    #print("y:  "+str(y))
    #print("y2:  "+str(y2))
    #y2 = y2.reshape(11925, 1, 3)

    y2sum = y2.sum(axis=0)

    print("y2 sum: " + str(y2sum))

    # provo ad aggiungere un peso per controbilanciare la scarsità di 1 nei record

    unique, counts = np.unique(arousals, return_counts=True)
    print("arousals count valori:" + str(dict(zip(unique, counts))))

    categorical_labels = to_categorical(arousals, num_classes=None)
    y = categorical_labels
    print("y " + str(y))
    y_ints = [yi.argmax() for yi in y2]

    unique, counts = np.unique(y_ints, return_counts=True)
    print("y_ints count valori:" + str(dict(zip(unique, counts))))
    class_weights = class_weight.compute_class_weight('balanced',
                                                      np.unique(y_ints),
                                                      y_ints)

    # class_weights='auto'
    L.log_info("CLASS Wheigths" + str(class_weights))

    callbacks = [
        # Early stopping definition
        EarlyStopping(monitor='loss', patience=3, verbose=1),
        # Decrease learning rate by 0.1 factor
        #sAdvancedLearnignRateScheduler(monitor='loss', patience=1, verbose=1, mode='auto', decayRatio=0.1),
        # Saving best model
        ModelCheckpoint('models/' + str(p_MODEL_FILE),
                        monitor='loss',
                        save_best_only=True,
                        verbose=1),
        CSVLogger('logs/' + p_KERAS_LOG_FILE, separator=',', append=False),
        TensorBoard(log_dir='tensorboard/' + str(p_TENSORBOARD_LOGDIR),
                    histogram_freq=0,
                    batch_size=32,
                    write_graph=True,
                    write_grads=False,
                    write_images=False,
                    embeddings_freq=0,
                    embeddings_layer_names=None,
                    embeddings_metadata=None,
                    embeddings_data=None)
    ]
    if (model is None):
        #model =LSTM_model()
        model = Bi_LSTM_model()

    # split train and validation sets

    idxval_start = np.random.randint(np.trunc(recordLength / 2), size=1)[0]
    idxval_size = np.random.randint(np.trunc(recordLength / 2), size=1)[0]
    #print(idxval_start)
    #print(idxval_size)
    Xval = x[idxval_start:idxval_start + idxval_size, :, :]
    Yval = y2[idxval_start:idxval_start + idxval_size, :]
    print('Train...')
    model.fit(x,
              y2,
              validation_data=(Xval, Yval),
              epochs=p_EPOCHS,
              class_weight=class_weights,
              batch_size=p_BATCH_SIZE,
              callbacks=callbacks)  #200
    pred = model.predict(x)
    predict_classes = np.argmax(pred, axis=1)
    predict_classes[predict_classes == 2] = -1

    #print("Predicted classes: {}",predict_classes)
    #print("Expected classes: {}",y)
    print("Predicted sum: {}", predict_classes.sum(axis=0))
    print("Expected sum: {}", y.sum(axis=0))

    try:
        model.save_weights('models/' + str(p_MODEL_FILE))
    except:
        L.log_info("non ho trovato i pesi. parto a  zero")

    return model
        for dirName, subdirList, fileList in os.walk('.'):
            for fname in fileList:
                if ('.vec' in fname[-4:] or '.py' in fname[-3:]
                        or '.pkl' in fname[-4:] or '.txt' in fname[-4:]
                        or '.log' in fname[-4:] or '.hdf5' in fname[-5:]):
                    myzip.write(os.path.join(dirName, fname))


# -----------------------------------------------------------------------------
#    MAIN FUNCTION
# -----------------------------------------------------------------------------
if __name__ == '__main__':
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    L.init_logger(T.p_LOG_FILE)
    dtInizioElaborazione = datetime.datetime.now()
    L.log_info("Execution starts at: " + str(dtInizioElaborazione))
    try:
        L.log_info(
            "############################## TRAIN ##############################"
        )
        train()
        L.log_info(
            "###################### SCORE TRAINING SET #########################"
        )
        score_training_set()
        L.log_info(
            "###################### EVALUATE TEST SET  #########################"
        )
        evaluate_test_set()
        L.log_info(
            "####################### PACKAGE ENTRY #############################"