Beispiel #1
0
def write_dataset_to_file(filename, type, length):

    """
    Write a preprocessed dataset with a given segment length to a given file.
    The file format is one row of space-separated values for each point.

    :param filename: the file's name.
    :param type: the signal type (e.g., "ABP").
    :param length: the number of samples each segment is made of.

    :return: nothing
    """

    ECG_lists = collections.defaultdict(list)

    waveforms = get_waveforms_list()

    for cpair in waveforms:

        folder = cpair[0]
        record = cpair[1]

        record_header = wfdb.rdheader(record, pbdir='mimic3wdb/{}/{}/'.format(folder, record))
        segments = record_header.segname

        for segment in segments:
            if is_segment(segment):
                # Read segment's header file.
                print(segment)
                # TODO: change to read content.
                segment_content = wfdb.rdheader(segment, pbdir='mimic3wdb/{}/{}/'.format(folder, record))

                #TODO: preprocess and slice on the go, we have no space for the whole thing.
                if type == "ABP":
                    pass
Beispiel #2
0
def extract_labs(infile):
    """
    extract_labs(infile)

    Take all lab values in the mimic3 db for infile
    
    TODO: Will need to build option to include only labs/notes in the period where there is waveform/numeric data
    but for now we include everything so it is available for context (eg echo reports)

    Parameters
    ----------

    infile: string
        filename of a wfdb file from the MIMIC3 matched dataset
    origin: datetime
        the base datetime for the file

    return: notes
        DataFrame containing notes, times, etc
    """

    # get patient ID
    subj_id = patient_id_from_file(infile)

    #get basetime
    origin = wfdb.rdheader(infile).base_datetime

    #get lab_events for this patient
    con = open_db()

    query = \
    """
    SELECT e.charttime, e.itemid, e.value, e.valuenum, e.valueuom, e.flag,
        i.label, i.fluid, i.category, i.loinc_code
    FROM labevents e
    INNER JOIN d_labitems i
    ON e.itemid = i.itemid
    WHERE subject_id = {};
    """.format(subj_id)
    labs = pd.read_sql_query(query, con)

    #convert time
    origin = pd.to_datetime(wfdb.rdheader(infile).base_datetime)
    labs.insert(0, 'time', '')

    for idx, row in labs.iterrows():
        labs['time'].iloc[idx] = int(
            (pd.to_datetime(row['charttime']) - origin).total_seconds())
    del labs['charttime']

    return (labs)
Beispiel #3
0
    def extract_labels(self, filepath):
        p_index = -1
        # if self.args.database == "PTB" or self.args.database == "ECG-ID":
        for folders in os.listdir(filepath):
            if (folders.startswith('Person_')
                    or folders.startswith('patient')):
                p_index += 1
                self.persons_labels.append(folders)

                if self.args.database == "PTB":
                    for onepersonsdir in os.listdir(
                            os.path.join(filepath, folders)):
                        if onepersonsdir.endswith('hea'):
                            ecg_record = wfdb.rdheader(
                                os.path.join(filepath, folders,
                                             onepersonsdir.split(".", 1)[0]))
                            patient_status = ecg_record.comments[4].split(
                                ":")[1].strip()
                            self.health_labels.append(patient_status)
                            break  # only read hea file one time per persone to know the health status
                # if (onepersonsdir.startswith('rec_1.') and onepersonsdir.endswith('hea')):
                #     with open(os.path.join(filepath, folders, onepersonsdir),"r") as f:
                #         array2d = [[str(token) for token in line.split()] for line in f]
                #         self.age_labels.append(array2d[4][2])
                #         self.gender_labels.append(array2d[5][2])
                #         self.date_labels.append(array2d[6][3])
                #     f.close()
            if (folders.endswith('csv')):  # This is for MIT-BIH database
                p_index += 1
                basename = folders.split(".", 1)[0]  # rec_1 rec_2....
                self.persons_labels.append(
                    basename)  # no classify to person1 person2 dir
Beispiel #4
0
def writePatientData():
    f = open('../ptbdb/RECORDS', 'r')
    o = open('../out/patientAttr.txt', 'w')
    for line in f:
        pat = line.strip()
        rfile = '../ptbdb/' + pat
        record = wfdb.rdheader(rfile)
        diagnoses = rutil.extratPatientDiagnoses(record)
        attributes = rutil.extractPatientAttributes(record)
        o.write(pat)
        o.write(',')
        o.write(rfile)
        for attr in attributes:
            o.write(',')
            o.write(attr)
        o.write(',')
        o.write(diagnoses[0])
        o.write('\n')


# record = wfdb.rdsamp('../ptbdb/patient002/s0015lre')
# rutil.showGraph(record)

#writeAllDiagnosisToFile()

#rman.separateRecords()
Beispiel #5
0
    def _load_header(self, rec:str) -> dict:
        """ finished, checked,

        load header data into a dict

        Parameters:
        -----------
        rec: str,
            name of the record

        Returns:
        --------
        header_dict: dict,
        """
        header_dict = ED({})
        rec_fp = os.path.join(self.db_dir, rec)
        header_reader = wfdb.rdheader(rec_fp)
        header_dict['units'] = header_reader.units
        header_dict['baseline'] = header_reader.baseline
        header_dict['adc_gain'] = header_reader.adc_gain
        header_dict['record_fmt'] = header_reader.fmt
        try:
            header_dict['age'] = int([l for l in header_reader.comments if '<age>' in l][0].split(': ')[-1])
        except:
            header_dict['age'] = np.nan
        try:
            header_dict['sex'] = [l for l in header_reader.comments if '<sex>' in l][0].split(': ')[-1]
        except:
            header_dict['sex'] = ''
        d_start = [idx for idx, l in enumerate(header_reader.comments) if '<diagnoses>' in l][0] + 1
        header_dict['diagnoses'] = header_reader.comments[d_start:]
        return header_dict
def read_records(dataset_name, data_path, sample_size_seconds=30, samples_per_second=250, num_records=None):
    samples = []
    labels = []
    total_read_records = 0
    for record_name in wfdb.get_record_list(dataset_name):
        header = wfdb.rdheader(data_path + record_name)

        if header.sig_len == 0:
            continue

        offset = 0
        samples_count = 0
        while True:
            record, ann, offset = read_record(data_path, header, offset, sample_size_seconds, samples_per_second)
            if record is None:
                break
            samples.append(record)
            labels.append(ann.aux_note)
            samples_count += 1

        total_read_records += 1
        if num_records is not None and total_read_records == num_records:
            break

    labels = np.array([1 if '(AFIB' in key else 0 for key in labels])
    return samples, labels
Beispiel #7
0
    def _load_header(self, rec:str) -> dict:
        """ finished, checked,

        load header data into a dict

        Parameters
        ----------
        rec: str,
            name of the record

        Returns
        -------
        header_dict: dict,
        """
        header_dict = ED({})
        rec_fp = os.path.join(self.db_dir, rec)
        header_reader = wfdb.rdheader(rec_fp)
        header_dict["units"] = header_reader.units
        header_dict["baseline"] = header_reader.baseline
        header_dict["adc_gain"] = header_reader.adc_gain
        header_dict["record_fmt"] = header_reader.fmt
        try:
            header_dict["age"] = int([l for l in header_reader.comments if "<age>" in l][0].split(": ")[-1])
        except:
            header_dict["age"] = np.nan
        try:
            header_dict["sex"] = [l for l in header_reader.comments if "<sex>" in l][0].split(": ")[-1]
        except:
            header_dict["sex"] = ""
        d_start = [idx for idx, l in enumerate(header_reader.comments) if "<diagnoses>" in l][0] + 1
        header_dict["diagnoses"] = header_reader.comments[d_start:]
        return header_dict
Beispiel #8
0
def find_admission(filename):
    '''
    Get admission information from MIMIC III filename (contains subj ID)
    Return demographic information
    '''
    subj_id = 0
    hadm_id = 0
    diagnosis = ''
    expired = ''
    death_time = ''
    ethnicity = ''

    subj_id = patient_id_from_file(filename)
    print('searching {}'.format(subj_id))

    # get additional demographics
    con = open_db()
    query = \
    """
    SELECT i.subject_id, i.gender, i.dob 
    FROM patients i
    WHERE subject_id = {};
    """.format(subj_id)

    demo = pd.read_sql_query(query, con)
    gender = demo.gender.values[0]
    dob = demo.dob.values[0]

    record = wfdb.rdheader(filename)
    sig_start = record.base_datetime
    print('Signal file start {}'.format(sig_start))

    admits = get_admissions(subj_id)

    for idx, row in admits.iterrows():
        adm_time = row['admittime']
        dsc_time = row['dischtime']

        print('Admission # {}, in at {} out at {}'.format(
            row['hadm_id'], adm_time, dsc_time))

        if (sig_start > adm_time) and (sig_start < dsc_time):
            print('Subject {}, record {}, diagnosis: {}. HADM {} '.format(
                row['subject_id'], record.record_name, row['diagnosis'],
                row['hadm_id']))
            hadm_id = row['hadm_id']
            diagnosis = row['diagnosis']
            expired = row['hospital_expire_flag']
            death_time = row['deathtime']
            ethnicity = row['ethnicity']
            age = round(
                pd.to_timedelta(adm_time - dob) /
                pd.to_timedelta(365, unit='d'))

    return (subj_id, hadm_id, age, gender, ethnicity, diagnosis, expired,
            death_time)
Beispiel #9
0
def rdann_by_type(
    rec_path: str,
    ann_ext: str,
    from_time: str = None,
    to_time: str = None,
    types: str = WFDB_ANN_ALL_PEAK_TYPES,
):
    """
    Reads WFDB annotation file and returns annotations of specific types.
    :param rec_path: Record path (without extension).
    :param ann_ext: extension of annotation file to load.
    :param from_time: Start time. A string in PhysioNet time format [1]_.
    :param to_time: End time.  A string in PhysioNet time format [1]_.
    :param types: A string of chars of the annotation types to find (see [2]_).
    :return: A dictionary, mapping from the annotation type (a
    char) to a numpy array of indices in the signal.

    .. [1] https://www.physionet.org/physiotools/wag/intro.htm#time
    .. [2] https://www.physionet.org/physiobank/annotations.shtml
    """
    if not is_record(rec_path, ann_ext=ann_ext):
        raise ValueError(f"Can't find record {rec_path}")

    ann_to_idx = {ann_type: [] for ann_type in types}

    # In case it's a Path object; wfdb can't handle that
    rec_path = str(rec_path)

    # Handle from/to by converting to samples
    sampfrom, sampto = 0, sys.maxsize
    if from_time is not None or to_time is not None:
        header = wfdb.rdheader(rec_path)
        if from_time is not None:
            sampfrom = wfdb_time_to_samples(from_time, header.fs)
        if to_time is not None:
            sampto = wfdb_time_to_samples(to_time, header.fs)

    # Read annotations
    ann = wfdb.rdann(rec_path, ann_ext, sampfrom, sampto)

    # Find annotations of requested type
    annotations_pattern = re.compile(fr"[{types}]")
    joined_ann = str.join("", ann.symbol)
    matches = list(annotations_pattern.finditer(joined_ann))

    for i, m in enumerate(matches):
        ann_type = m.group()
        ann_idx = m.start()

        # Save annotation sample
        ann_to_idx[ann_type].append(ann.sample[ann_idx])

    return {
        ann_type: np.array(idxs)
        for (ann_type, idxs) in ann_to_idx.items()
    }
Beispiel #10
0
def header_reader(potassium_df=potassium_df):
    from datetime import datetime as dt

    #    for i in tqdm(range(len(potassium_df))):
    for i in range(100):
        charttime = dt.strptime(potassium_df.iloc[i]['CHARTTIME'],
                                '%Y-%m-%d %H:%M:%S')
        target_file = pd.DataFrame(index=[], columns=['file', 'diff'])
        for j in range(58):
            file = potassium_df.iloc[i][str(j)]

            if file is np.nan:
                continue

            filename = os.path.join(potassium_df.iloc[i]['dir'], file)
            filename = filename[:-4]
            record = wfdb.rdheader(filename)
            sig_len = record.sig_len
            date_time = record.base_datetime
            fs = record.fs
            #K測定時刻と心電図開始時間の照合
            diff_time = date_time - charttime
            pot_len = diff_time.seconds * fs
            #print(i,j)
            #print(charttime)
            #print(pot_len)
            if (pot_len > sig_len):  #headerのどこにも合わない場合にはcontinueで時間節約
                continue

            #全体のheader fileから各ファイルの名前とseq長の表を作成
            header_table_df = pd.DataFrame(np.vstack(
                [record.seg_name, record.seg_len]).T,
                                           columns=['file', 'sequence'])
            #header_table_df=header_table_df.query('file.str.match("[0-9]{7}_[0-9]{4}")')
            header_table_df['cumsum'] = header_table_df['sequence'].astype(
                int).cumsum()
            #header_table_df.query('cumsum-pot_len>0').iloc[0] #K測定時刻に相当するファイルを取り出す
            header_table_df['diff'] = header_table_df['cumsum'] - pot_len
            # target fileにはpotassium測定時刻に対応するfile名file、当該時刻における初めからの系列の長さを記録
            if len(target_file.index) == 1:
                print('{},{}: target file already exists!!'.format(i, j))

            target_file = header_table_df.query('diff>0')

            if len(target_file.index) == 0:
                continue
            elif len(target_file.index) == 1:
                #print('---')
                target_file = target_file.iloc[0][['file', 'diff']]
                #print(i,j)
                #print(target_file)
                #print('test')
        potassium_df['file'][i] = target_file['file']
        potassium_df['diff'][i] = target_file['diff']

    return (potassium_df)
Beispiel #11
0
def ecgrr(
    rec_path,
    ann_ext=None,
    channel=None,
    from_time=None,
    to_time=None,
    detector=qrs.ecgpuwave_detect_rec,
    dtype=np.float32,
):
    """
    Returns an RR-interval time-series given a PhysioNet record.
    :param rec_path: The path to the record (without any file extension).
    :param ann_ext: Extension of annotation file to use. If provided,
    R-peaks will be read from this annotation file instead of performing
    peak-detection.
    :param channel: Number of ECG channel in the record. Will be
    heuristically estimated if missing.
    :param from_time: Start time. A string in the PhysioNet time format.
    :param to_time: End time. A string in the PhysioNet time format.
    :param detector: A function to use for peak-detection. Will only be used if
    the ann_ext parameter was not provided.
    :param dtype: Desired dtype of output tensors.
    :return: Tuple of time axis and interval durations.
    """
    if not utils.is_record(rec_path, ann_ext=ann_ext):
        raise ValueError(f"Can't find record {rec_path}")

    if ann_ext is not None:
        # Load r-peaks from annotation
        ann_type = "N"
        ann = utils.rdann_by_type(rec_path,
                                  ann_ext,
                                  from_time,
                                  to_time,
                                  types=ann_type)
        sample_idxs = ann[ann_type]
    else:
        # Calculate r-peaks using a peak-detector
        sample_idxs = detector(rec_path,
                               channel=channel,
                               from_time=from_time,
                               to_time=to_time)

    header = wfdb.rdheader(rec_path)
    fs = float(header.fs)

    start_time = sample_idxs[0] / fs
    rri = np.diff(sample_idxs) / fs

    trr = np.empty_like(rri)
    np.cumsum(rri[0:-1], out=trr[1:])
    trr[0] = 0.0
    trr += start_time

    return trr.astype(dtype), rri.astype(dtype)
Beispiel #12
0
def get_wf_header(stay_id: str, pn_dir: str):
    wf = None
    sleep_period = 1
    while not wf:
        try:
            wf = wfdb.rdheader(stay_id, pn_dir=pn_dir)
        except (ConnectionError, MaxRetryError,
                requests.exceptions.ConnectionError):
            time.sleep(sleep_period)
            sleep_period *= 2
    return wf
Beispiel #13
0
def pickle_ECG_headers():

    """
    Scan the dataset to build a list of Segments (containing length, record and types).
    Then pickle it to "mimicIII-headers-dict.pickle".

    This is done through dataset header files.
    Useful fields:
        siglen - integer signal length in terms of samples (there's 125 samples per second, as 'fs' can testify)
        signame - list of signal types. Accepted ECG types are stored in a dictionary.
        segname - list of segment names, contained in a record's header.

    :return: nothing.
    """

    ECG_types = {"I", "II", "III", "AVR", "AVL", "AVF", "V", "V1", "V2", "V3", "V4", "V5", "MLI", "MLII", "MLIII", "ABP"}
    ECG_list = []

    waveforms = get_waveforms_list()

    for cpair in waveforms:

        folder = cpair[0]
        record = cpair[1]

        record_header = wfdb.rdheader(record, pbdir='mimic3wdb/{}/{}/'.format(folder, record))
        segments = record_header.segname

        for segment in segments:
            if is_segment(segment):
                # Read segment's header file.
                print(segment)
                segment_header = wfdb.rdheader(segment, pbdir='mimic3wdb/{}/{}/'.format(folder, record))
                # Append signal lengths.
                ctypes = set(segment_header.signame)
                cintersection = ctypes.intersection(ECG_types)
                if bool(cintersection):  # If nonempty intersection.
                    ECG_list.append(Segment(segment_header.siglen, int(record), cintersection))

    with open('mimicIII-headers-list.pickle', 'wb') as file:
        pickle.dump(ECG_list, file)
Beispiel #14
0
def find_ecg_channel(rec_path):
    """
    Heuristically finds the index of the first ECG channel in a record.
    :param rec_path: Path to record without extension.
    :return: The index of the first ECG channel.
    """
    pattern = re.compile(ECG_CHANNEL_PATTERN, re.IGNORECASE)
    header = wfdb.rdheader(rec_path)
    for i, name in enumerate(header.sig_name):
        if pattern.match(name):
            return i
    return None
Beispiel #15
0
    def _get_channels_to_read(self, rec_name):
        header = wfdb.rdheader(rec_name)

        matching_channels = [
            chan_idx for chan_idx, chan_name in enumerate(header.sig_name)
            if self.channel_pattern.search(chan_name) is not None
        ]

        if self.first_channel_only and len(matching_channels) > 0:
            return matching_channels[0:1]
        else:
            return matching_channels
Beispiel #16
0
    def load_beat_ann(self,
                      rec: str,
                      sampfrom: Optional[int] = None,
                      sampto: Optional[int] = None,
                      keep_original: bool = False) -> Dict[str, np.ndarray]:
        """ finished, checked,

        load beat annotations,
        which are stored in the `symbol` attribute of corresponding annotation files
        
        Parameters:
        -----------
        rec: str,
            name of the record
        sampfrom: int, optional,
            start index of the annotations to be loaded
        sampto: int, optional,
            end index of the annotations to be loaded
        keep_original: bool, default False,
            if True, indices will keep the same with the annotation file
            otherwise subtract `sampfrom` if specified
        
        Returns:
        --------
        ann, dict,
            locations (indices) of the all the beat types ("A", "N", "Q", "V",)
        """
        fp = os.path.join(self.db_dir, rec)
        header = wfdb.rdheader(fp)
        sig_len = header.sig_len
        sf = sampfrom or 0
        st = sampto or sig_len
        assert st > sf, "`sampto` should be greater than `sampfrom`!"

        wfdb_ann = wfdb.rdann(
            fp,
            extension=self.manual_ann_ext,
            sampfrom=sampfrom or 0,
            sampto=sampto,
        )
        ann = ED({k: [] for k in self.all_beat_types})
        for idx, bt in zip(wfdb_ann.sample, wfdb_ann.symbol):
            if bt not in self.all_beat_types:
                continue
            ann[bt].append(idx)
        if not keep_original and sampfrom is not None:
            ann = ED(
                {k: np.array(v, dtype=int) - sampfrom
                 for k, v in ann.items()})
        else:
            ann = ED({k: np.array(v, dtype=int) for k, v in ann.items()})
        return ann
Beispiel #17
0
def readWFBDBasicInfo(path):
    '''
    just input a path and showing the infomation of the file,
    nothing else
    '''
    HDR = wfdb.rdheader(path)
    print("channel \t sig_name \t Unit \t baseline \t adc_zero")
    print("===============================================")
    for i in range(HDR.n_sig):
        print(" {} \t {} \t {} \t {} \t {} ".format(i, HDR.sig_name[i],
                                                    HDR.units[i],
                                                    HDR.baseline[i],
                                                    HDR.adc_zero[i]))
Beispiel #18
0
def read_record(path):
    hdr = wfdb.rdheader(path)

    if hdr.sig_len < max_block_size:
        data, rate = soundfile.read(path + '.flac',
                                    dtype='int16',
                                    always_2d=True)
    else:
        data = _read_blocks(hdr, path)

    hdr.p_signal = to_physical(data, hdr)

    return hdr
Beispiel #19
0
def writeAllDiagnosisToFile():
    #figure out what all possible diagnosis contained within this database
    possible = []
    f = open('../ptbdb/RECORDS', 'r')
    for line in f:
        rfile = '../ptbdb/' + line.strip()
        record = wfdb.rdheader(rfile)
        diagnosis = rutil.extratPatientDiagnoses(record)
        if not any(diagnosis in s for s in possible):
            possible.append(diagnosis)

    diagnosesfile = open('../out/possible-diagnoses.txt', 'w')
    for d in possible:
        diagnosesfile.write(d + '\n')
Beispiel #20
0
class Record():
    def __init__(self, record_dir: Path, case: str):
        reco = wfdb.rdrecord(str(record_dir))
        head = wfdb.rdheader(str(record_dir))
        self.record_dir = record_dir
        self.case = case
        self.name = head.record_name
        self.time = head.base_time
        self.date = head.base_date
        self.fs = reco.fs
        self.slen = reco.sig_len
        self.n_sig = reco.n_sig
        self.sig_names = reco.sig_name
        self.units = reco.units
        self.rr = None
Beispiel #21
0
def read(record_name):
    """
    Read a record and the pH
    """

    import wfdb

    record = wfdb.rdrecord(f'{PATH}/{record_name}')
    header = wfdb.rdheader(f'{PATH}/{record_name}')

    comments = header.comments
    ph = float([c for c in comments if 'pH' in c][0].replace('pH', '').strip())

    fhr, uc = record.p_signal[:,0], record.p_signal[:,1]

    return fhr, uc, ph
def get_single_patiennce_data(
    patience,
    dataset_path='Data/StPeterburg/',
):
    PatienceData = {}
    for label in classify_label:
        PatienceData[label] = []
    sample_rate = wfdb.rdheader(dataset_path + patience).__dict__['fs']
    all_middle_qrs_annotation = wfdb.rdann(dataset_path + patience,
                                           'atr').sample
    all_middle_qrs_label = wfdb.rdann(dataset_path + patience, 'atr').symbol
    patience_signal, _ = wfdb.rdsamp(dataset_path + patience)
    patience_signal = np.asarray(patience_signal).T

    curent_signal_idx = 0
    curent_annotation_idx = 0

    while curent_signal_idx < len(patience_signal[0]):

        middle_qrs_of_signal = []
        middle_qrs_label_of_signal = []
        while curent_annotation_idx < len(
                all_middle_qrs_annotation
        ) and all_middle_qrs_annotation[
                curent_annotation_idx] <= curent_signal_idx + 10 * sample_rate:
            middle_qrs_of_signal.append(
                all_middle_qrs_annotation[curent_annotation_idx])
            middle_qrs_label_of_signal.append(
                all_middle_qrs_label[curent_annotation_idx])
            curent_annotation_idx += 1

        mean_interval = (middle_qrs_of_signal[-1] -
                         middle_qrs_of_signal[0]) // len(middle_qrs_of_signal)
        # print(middle_qrs_of_signal)
        # print(middle_qrs_label_of_signal)
        for idx, label in enumerate(middle_qrs_label_of_signal):
            if label in classify_label:
                PatienceData[label].append(
                    pad(
                        zero_one_scale(
                            patience_signal[:, middle_qrs_of_signal[idx]:
                                            middle_qrs_of_signal[idx] +
                                            int(1.2 * mean_interval)])))

        curent_signal_idx += 10 * sample_rate
    return PatienceData
Beispiel #23
0
def extract_notes(infile):
    """
    extract_notes(infile)

    Take all notes in the mimic3 db for infile
    
    TODO: Will need to build option to include only labs/notes in the period where there is waveform/numeric data
    but for now we include everything so it is available for context (eg echo reports)

    Parameters
    ----------

    infile: string
        filename of a wfdb file from the MIMIC3 matched dataset
    origin: datetime
        the base datetime for the file

    return: notes
        DataFrame containing notes, times, etc
    """

    # get patient ID
    subj_id = patient_id_from_file(infile)

    #get lab_events for this patient
    con = open_db()

    query = \
    """
    SELECT i.chartdate, i.charttime, i.description, i.category, i.text
    FROM noteevents i
    WHERE subject_id = {};
    """.format(subj_id)

    notes = pd.read_sql_query(query, con)
    """ change time stamp to seconds from origin """

    origin = pd.to_datetime(wfdb.rdheader(infile).base_datetime)
    notes.insert(0, 'time', '')
    for idx, row in notes.iterrows():
        notes['time'].iloc[idx] = int(
            (pd.to_datetime(row['charttime']) - origin).total_seconds())
    del notes['charttime']
    del notes['chartdate']

    return (notes)
Beispiel #24
0
    def load_label(self,
                   rec: str,
                   ann: Optional[wfdb.Annotation] = None,
                   sampfrom: Optional[int] = None,
                   sampto: Optional[int] = None,
                   fmt: str = "a") -> str:
        """ finished, checked,

        load (classifying) label of the record,
        among the following three classes:
        "non atrial fibrillation",
        "paroxysmal atrial fibrillation",
        "persistent atrial fibrillation",

        Parameters
        ----------
        rec: str,
            name of the record
        ann: Annotation, optional,
            not used, to keep in accordance with other methods
        sampfrom: int, optional,
            not used, to keep in accordance with other methods
        sampto: int, optional,
            not used, to keep in accordance with other methods
        fmt: str, default "a",
            format of the label, case in-sensitive, can be one of:
            "f", "fullname": the full name of the label
            "a", "abbr", "abbrevation": abbreviation for the label
            "n", "num", "number": class number of the label (in accordance with the settings of the offical class map)

        Returns
        -------
        label: str,
            classifying label of the record
        """
        header = wfdb.rdheader(self._get_path(rec))
        label = header.comments[0]
        if fmt.lower() in ["a", "abbr", "abbreviation"]:
            label = self._labels_f2a[label]
        elif fmt.lower() in ["n", "num", "number"]:
            label = self._labels_f2n[label]
        elif not fmt.lower() in ["f", "fullname"]:
            raise ValueError(f"format `{fmt}` of labels is not supported!")
        return label
Beispiel #25
0
 def show_table(self):
     # 这么多行
     self.timer.stop()
     self.bottom_layout.setCurrentIndex(4)
     rows = self.patient
     for row in range(0, rows):
         item = QTableWidgetItem(str(100 + row))
         self.patient_table.setItem(row, 0, item)
         head = wfdb.rdheader('MIT-BIH/mit-bih-database/' + str(100 + row))
         age, gender, _, _, _ = head.comments[0].split(" ")
         item = QTableWidgetItem(str(age))
         self.patient_table.setItem(row, 1, item)
         item = QTableWidgetItem(str(gender))
         self.patient_table.setItem(row, 2, item)
         drugs = head.comments[1]
         item = QTableWidgetItem(str(drugs))
         self.patient_table.setItem(row, 3, item)
         record = wfdb.rdann('MIT-BIH/mit-bih-database/' + str(100 + row),
                             "atr",
                             sampfrom=0,
                             sampto=650000)
         A, V, F, R, L = 0, 0, 0, 0, 0
         for index in record.symbol:
             if index == 'A':
                 A += 1
             if index == "V":
                 V += 1
             if index == "F":
                 F += 1
             if index == "R":
                 R += 1
             if index == "L":
                 L += 1
         item = QTableWidgetItem(str(A))
         self.patient_table.setItem(row, 4, item)
         item = QTableWidgetItem(str(V))
         self.patient_table.setItem(row, 5, item)
         item = QTableWidgetItem(str(F))
         self.patient_table.setItem(row, 6, item)
         item = QTableWidgetItem(str(R))
         self.patient_table.setItem(row, 7, item)
         item = QTableWidgetItem(str(L))
         self.patient_table.setItem(row, 8, item)
         self.patient_table.resizeColumnsToContents()
Beispiel #26
0
    def _aggregate_stats(self) -> NoReturn:
        """ finished, checked,

        aggregate stats on the whole dataset
        """
        stats_file = "stats.csv"
        stats_file_fp = os.path.join(self.db_dir_base, stats_file)
        if os.path.isfile(stats_file_fp):
            self._stats = pd.read_csv(stats_file_fp)

        if self._stats.empty or set(self._stats_columns) != set(
                self._stats.columns):
            print(
                "Please wait patiently to let the reader aggregate statistics on the whole dataset..."
            )
            start = time.time()
            self._stats = pd.DataFrame(self.all_records, columns=[
                "record"
            ])  # use self.all_records to ensure it's computed
            self._stats["tranche"] = self._stats["record"].apply(
                lambda s: self._all_records_inv[s])
            self._stats["subject_id"] = self._stats["record"].apply(
                lambda s: int(s.split("_")[1]))
            self._stats["record_id"] = self._stats["record"].apply(
                lambda s: int(s.split("_")[2]))
            self._stats["label"] = self._stats["record"].apply(
                lambda s: self.load_label(s))
            self._stats["fs"] = self.fs
            self._stats["sig_len"] = self._stats["record"].apply(
                lambda s: wfdb.rdheader(self._get_path(s)).sig_len)
            self._stats[
                "sig_len_sec"] = self._stats["sig_len"] / self._stats["fs"]
            self._stats["revised"] = self._stats["record"].apply(
                lambda s: 1 if s in self.__revised_records else 0)
            self._stats = self._stats.sort_values(
                by=["subject_id", "record_id"], ignore_index=True)
            self._stats = self._stats[self._stats_columns]
            self._stats.to_csv(stats_file_fp, index=False)
            print(f"Done in {time.time() - start:.5f} seconds!")
        else:
            pass  # currently no need to parse the loaded csv file
        self.__all_records = self._stats["record"].tolist()
 def testSingleEpoch(self, i):
     random.shuffle(self.testList)
     ers = []
     numcorrect = 0
     numHealthyCorrect = 0
     for rec in self.testList:
         #print('testing ' + rec)
         record = wfdb.rdheader('../ptbdb/' + rec)
         (sqerr, correct, healthyCorrect) = self.error(rec, record)
         ers.append(sqerr)
         if correct: numcorrect += 1
         if healthyCorrect: numHealthyCorrect += 1
     totalsqerr = math.sqrt(sum(ers))
     correctpercent = float(numcorrect) / len(self.testList)
     correctHealth = float(numHealthyCorrect) / len(self.testList)
     print('square mean error: %.4f' % totalsqerr)
     print('correct predictions: %.4f' % correctpercent)
     print('correct healthy prediction: %.4f' % correctHealth)
     self.testfile.write(
         str(i) + "," + str(totalsqerr) + "," + str(correctpercent) + "," +
         str(correctHealth) + "\n")
Beispiel #28
0
    def load_ann(self, rec:str, sampfrom:Optional[int]=None, sampto:Optional[int]=None, fmt:str="interval", keep_original:bool=False) -> Union[Dict[str, list], np.ndarray]:
        """ finished, checked,

        load annotations (header) stored in the .hea files
        
        Parameters:
        -----------
        rec: str,
            name of the record
        sampfrom: int, optional,
            start index of the annotations to be loaded
        sampto: int, optional,
            end index of the annotations to be loaded
        fmt: str, default "interval", case insensitive,
            format of returned annotation, can also be "mask"
        keep_original: bool, default False,
            if True, in the "interval" `fmt`,
            intervals (in the form [a,b]) will keep the same with the annotation file
            otherwise subtract `sampfrom` if specified
        
        Returns:
        --------
        ann, dict or ndarray,
            the annotations in the format of intervals, or in the format of mask
        """
        fp = os.path.join(self.db_dir, rec)
        wfdb_ann = wfdb.rdann(fp, extension=self.ann_ext)
        header = wfdb.rdheader(fp)
        sig_len = header.sig_len
        sf = sampfrom or 0
        st = sampto or sig_len
        assert st > sf, "`sampto` should be greater than `sampfrom`!"

        ann = ED({k:[] for k in self.class_map.keys()})
        critical_points = wfdb_ann.sample.tolist() + [sig_len]
        aux_note = wfdb_ann.aux_note
        if aux_note[0] == "(N":
            # ref. the doc string of the class
            critical_points[0] = 0
        else:
            critical_points.insert(0, 0)
            aux_note.insert(0, "(N")

        for idx, rhythm in enumerate(aux_note):
            ann[rhythm.replace("(", "")].append([critical_points[idx], critical_points[idx+1]])
        ann = ED({
            k: generalized_intervals_intersection(l_itv, [[sf,st]]) \
                for k, l_itv in ann.items()
        })

        if fmt.lower() == "mask":
            tmp = deepcopy(ann)
            ann = np.full(shape=(st-sf,), fill_value=self.class_map.N, dtype=int)
            for rhythm, l_itv in tmp.items():
                for itv in l_itv:
                    ann[itv[0]-sf: itv[1]-sf] = self.class_map[rhythm]
        elif not keep_original:
            for k, l_itv in ann.items():
                ann[k] = [[itv[0]-sf, itv[1]-sf] for itv in l_itv]

        return ann
Beispiel #29
0
def convert_mimic_matched (filename, samp_end = None, all_labs=True, all_notes=True):
    ''' 
    TODO:
    time filter
    additional demographics
    
    mapping function
    '''
    # all_labs - include labs from outside the time range of the specified signal file
    # all_notes - include labs from outside the time range of the specified signal file

    # samp_end - to limit size of datafile for testing, default = None
    
    # use base file, pull numerics

    if filename[-1] != 'n':
        print('Base is waveform file, add numerics')
    else:
        print('Base is numerics file, change basename to waveform and then process numerics')
        filename = filename[:-1]
    
    # generate output filename
    
    outfile = 'mimic_test.h5'
    
    # read header
    record = wfdb.rdheader(filename)
    
#    meta_head = wfdb_head_meta(filename)
    
    with h5py.File(outfile, 'w') as f:
#        meta = f.require_group('.meta')
#        meta.attrs['data'] = json.dumps(meta_head, indent = 4)
#        meta.attrs['mapping'] = json.dumps('Placeholder', indent = 4)
        
        
        grp_numerics = f.require_group('numerics')
        root = f['/']

        print('Converting numerics')
        record = wfdb.rdrecord(filename+'n', sampfrom = 0, sampto = samp_end )
        df = pd.DataFrame(data = record.p_signal, columns = record.sig_name)
        ds_num = grp_numerics.create_dataset('vitals', maxshape = (None,), data = df.to_records(index=False),
                                 compression="gzip", compression_opts=9, shuffle = True)
        
        grp_waveforms = f.require_group('/waveforms')
        print('Converting waveforms')
        record = wfdb.rdrecord(filename, sampfrom = 0, sampto = samp_end )
        df = pd.DataFrame(data = record.p_signal, columns = record.sig_name)
        ds_wave = grp_waveforms.create_dataset('hemodynamics', maxshape = (None,), data = df.to_records(index=False),
                                 compression="gzip", compression_opts=9, shuffle = True)
        
        grp_clinical = f.require_group('/clinical')

        #demographics

        print('Locating admission')
        (subj_id, hadm_id, age, gender, ethnicity, diagnosis, expired, death_time) = find_admission(filename)

        demographics = {
            'Age' : age,
            'Ethnicity' : ethnicity,
            'Gender'    : gender,
            'Expired'   : expired,
            'Death_time': death_time
        }
        
        root.attrs['demographics'] = json.dumps(demographics, indent = 4)
        grp_clinical.attrs['admit_diagnosis'] = json.dumps(diagnosis)  # add additional codes from dianosis table
        
        #get additional diagnoses - save as dict
        dx_list = get_diagnoses(hadm_id)
        grp_clinical.attrs['diagnoses'] = json.dumps(dx_list, indent = 4)
        
#        reseach = f.require_group('/Research')

        
        #convert numerics
        #convert waveforms
        
    print ('Extracting labs')
    labs = extract_labs(filename)
    write_labs(labs, outfile)
    
    print ('Extracting notes')
    notes = extract_notes(filename)
    write_notes(notes, outfile)      
Beispiel #30
0
    def load_af_episodes(
            self,
            rec: str,
            ann: Optional[wfdb.Annotation] = None,
            sampfrom: Optional[int] = None,
            sampto: Optional[int] = None,
            zero_start: bool = False,
            fs: Optional[Real] = None,
            fmt: str = "intervals") -> Union[List[List[int]], np.ndarray]:
        """ finished, checked,

        load the episodes of atrial fibrillation, in terms of intervals or mask

        Parameters
        ----------
        rec: str,
            name of the record
        ann: Annotation, optional,
            the wfdb Annotation of the record,
            if None, corresponding annotation file will be read
        sampfrom: int, optional,
            start index of the data to be loaded,
            not used when `fmt` is "c_intervals"
        sampto: int, optional,
            end index of the data to be loaded,
            not used when `fmt` is "c_intervals"
        zero_start: bool, default False,
            if True, (relative) start index is zero,
            otherwise, (relative) start index is `sampfrom`,
            works only when `sampfrom` is positive and `fmt` is not "c_intervals"
        fs: real number, optional,
            if not None, positions of the loaded intervals or mask will be ajusted according to this sampling frequency
        fmt: str, default "intervals",
            format of the episodes of atrial fibrillation, can be one of "intervals", "mask", "c_intervals"

        Returns
        -------
        af_episodes: list or ndarray,
            episodes of atrial fibrillation, in terms of intervals or mask
        """
        header = wfdb.rdheader(self._get_path(rec))
        label = self._labels_f2a[header.comments[0]]
        siglen = header.sig_len
        # if ann is None or fmt.lower() in ["c_intervals",]:
        #     _ann = wfdb.rdann(self._get_path(rec), extension=self.ann_ext)
        # else:
        #     _ann = ann
        _ann = wfdb.rdann(self._get_path(rec), extension=self.ann_ext)
        sf, st = self._validate_samp_interval(rec, sampfrom, sampto)
        aux_note = np.array(_ann.aux_note)
        critical_points = _ann.sample
        af_start_inds = np.where((aux_note == "(AFIB")
                                 | (aux_note == "(AFL"))[0]  # ref. NOTE 3.
        af_end_inds = np.where(aux_note == "(N")[0]
        assert len(af_start_inds) == len(af_end_inds), \
            "unequal number of af period start indices and af period end indices"

        if fmt.lower() in [
                "c_intervals",
        ]:
            if sf > 0 or st < siglen:
                raise ValueError(
                    f"when `fmt` is `c_intervals`, `sampfrom` and `sampto` should never be used!"
                )
            af_episodes = [[start, end]
                           for start, end in zip(af_start_inds, af_end_inds)]
            return af_episodes

        intervals = []
        for start, end in zip(af_start_inds, af_end_inds):
            itv = [critical_points[start], critical_points[end]]
            intervals.append(itv)
        intervals = generalized_intervals_intersection(intervals, [[sf, st]])

        siglen = st - sf
        if fs is not None and fs != self.fs:
            siglen = self._round(siglen * fs / self.fs)
            sf = self._round(sf * fs / self.fs)
            if label == "AFf":
                # ref. NOTE. 1 of the class docstring
                # the `ann.sample` does not always satify this point after resampling
                intervals = [[sf, siglen - 1]]
            else:
                intervals = [[
                    self._round(itv[0] * fs / self.fs),
                    self._round(itv[1] * fs / self.fs)
                ] for itv in intervals]

        if zero_start:
            intervals = [[itv[0] - sf, itv[1] - sf] for itv in intervals]
            sf = 0
        af_episodes = intervals

        if fmt.lower() in [
                "mask",
        ]:
            mask = np.zeros((siglen, ), dtype=int)
            for itv in intervals:
                mask[itv[0] - sf:itv[1] - sf] = 1
            af_episodes = mask

        return af_episodes