Python files_builder 예제들, data.data_files.files_builder Python 예제들

예제 #1

0

파일 보기

def main(kind, file):
    logger = logging.getLogger(__name__)
    logging.basicConfig(level=logging.INFO)
    mne.set_log_level(logging.ERROR)

    if file is not '':
        _, ext = os.path.splitext(file)
        file = files_builder(ext=ext).single_file(file)
        interactive_plot(file)
        return

    logger.info(f'Plotting EEG singals of kind {kind}.')

    for file in files_builder(kind):
        interactive_plot(file)

예제 #2

0

파일 보기

파일: non_linear.py 프로젝트: mirgee/thesis_project

def create_training_data(output_path,
                         kind,
                         window=None,
                         minl=0,
                         maxl=None,
                         existing_df=None):
    """Create a dataframe with features and labels suitable for training."""
    logging.info('Creating training data.')

    cols = pd.MultiIndex.from_product([CHANNEL_NAMES, algos.measure_names],
                                      names=['channel', 'measure'])
    if window is not None:
        idxs = pd.MultiIndex.from_product(
            [list(range(1, 134)), ['a', 'b'], [0]],
            names=['patient', 'trial', 'window'])
    else:
        idxs = pd.MultiIndex.from_product([list(range(1, 134)), ['a', 'b']],
                                          names=['patient', 'trial'])
    main_df = pd.DataFrame(columns=cols, index=idxs) if existing_df is None \
        else existing_df

    for file in files_builder(kind):
        if main_df.loc[(file.id, file.trial)].isnull().values.any():
            new_row = compute_nl(file.df, window, minl, maxl)
            main_df.loc[(file.id, file.trial)] = pd.Series(new_row)
            logging.debug("New row: \n%s" % new_row)
            logging.debug(f'Saving training data at {output_path}.')
            logging.info(f'Processed file {file.number}')
            main_df.to_pickle(output_path)
        else:
            logging.debug(f"Skipping row ({file.id}, {file.trial})")
            continue

예제 #3

0

파일 보기

파일: surrogates.py 프로젝트: mirgee/thesis_project

def create_surrogates():
    for file in files_builder(DataKind.PROCESSED):
        surr_df = pd.DataFrame().reindex_like(file.df)
        for col in file.df.columns:
            surr_df[col] = iaaft(
                file.df[col], maxiter=1000, atol=1e-8, rtol=1e-10)[0]
        file_name = os.path.splitext(file.name)[0] + '.csv'
        file_path = os.path.join(SURROGATE_ROOT, file_name)
        surr_df.to_csv(file_path, sep='\t')

예제 #4

0

파일 보기

파일: preprocess.py 프로젝트: mirgee/thesis_project

def preprocess_all(output_file=PROCESSED_ROOT):
    for file in files_builder(DataKind.RAW):
        mne_raw_data = files_builder(DataKind.MNE, file=file)

        try:
            mne_raw_data = preprocess_raw_mne_file(mne_raw_data)
        except ValueError:
            # Raised when duration is < 60 s, we may safely skip the file
            logging.debug(f'Skipping file {file.name} because of',
                          'insufficient duration.')
            continue

        processed_file_name = os.path.splitext(file.name)[0] + '.fif'
        mne_raw_data.save(os.path.join(output_file, processed_file_name),
                          proj=False,
                          overwrite=True)

    return mne_raw_data

예제 #5

0

파일 보기

def compute_nl_measure(model_path, measure='lyap'):
    idxs = pd.MultiIndex.from_product([list(range(1, 134)), ['a', 'b']],
                                      names=['patient', 'trial'])
    cols = ['channel_n', 'feature', 'value', 'layer', 'unit', 'trial']
    df = pd.DataFrame(index=idxs, columns=cols)
    df = df.astype({
        'channel': str,
        'value': float,
        'layer': int,
        'unit': int,
        'filter': int,
    })

    model = k.load_model(model_path)
    sizes, strides = receptive_field_sizes_and_strides(model)

    # contain model name, feature
    model_name, _ = splitext(split(model_path)[0])
    measures_path = join(CORRS_ROOT, '_'.join((model_name, measure)))

    for alg in algos.registered_algos:
        if alg.algo_name == 'measure':
            mem_alg = Memoize(alg)
            break
    else:
        raise Exception(f'Algorithm {measure} not registered.')

    for file in files_builder(DataKind.PROCESSED):
        idx = file.id
        trial = file.trial
        # Do the same TS processing we did to construct the model input
        logging.info(f'Trial {idx}-{trial}...')
        for channel in CHANNEL_NAMES:
            data = preprocess(file.df[channel])
            # Compute values for each start end layer
            logging.info(f'Channel {channel}...')
            for layer_n, layer in enumerate(model.layers):
                shape = layer.shape
                filters = range(shape[-1])
                units = range(shape[1])
                logging.info(f'Layer {layer_n} with {shape[-1]+1} '
                             'filters, each {shape[1]+1} units...')
                # for filter_n, unit_n in product(filters, units):
                for unit_n in units:
                    start, end = get_rf_start_end(sizes, strides, unit_n,
                                                  layer_n)
                    value = mem_alg(data, start, end)
                    df.loc[(idx, trial), 'channel'] = channel
                    df.loc[(idx, trial), 'value'] = value
                    df.loc[(idx, trial), 'layer'] = layer_n
                    df.loc[(idx, trial), 'unit'] = unit_n
                    # df.loc[(idx, trial), 'filter'] = filter_n

        df.to_pickle(measures_path)

예제 #6

0

파일 보기

def interactive_plot(file):
    """Create an interactive figure visualizing all channels from a file."""
    def plot_mne_file(mne_file):
        fig = mne_file.plot(block=True, scalings='auto')
        fig.set_size_inches(18.5, 10.5, forward=True)

    data = files_builder(DataKind.MNE, file=file)
    plot_mne_file(data)

    logging.info(f'Plotting file {file.name} of kind={file.kind}, '
                 f'sfreq={data.info["sfreq"]}.')
    if file.kind == DataKind.RAW:
        logging.info(f'Plotting processed file {file.name}.')
        processed = preprocess_raw_mne_file(data)
        plot_mne_file(processed)

예제 #7

0

파일 보기

def compute_correlations(model_path, measures_path):
    """
    Compute pairs of (measure value, unit activation) for each unit in each
    layer, compute their correlation for each unit in each layer.
    """
    df = df.load_pickle(measures_path)
    # We will will have to use 9*2*num_units... Max num of units is
    # 5000 -> 9*2*4*5000*4 = 1.44 GB :/
    idx = pd.MultiIndex(levels=[[]] * 4,
                        labels=[[]] * 4,
                        names=['l', 'h', 'w', 'f'])
    cols = ['activation', 'value']
    df = pd.DataFrame(index=idx, columns=cols)

    for file in files_builder(DataKind.DIRECT):
        for channel_n, channel in enumerate(CHANNEL_NAMES):
            # TODO Compare each channel's value with the activation,
            # save the pair
            model = rebuild_model(model_path, channel_n)
            out = model(file.df[channel])

예제 #8

0

파일 보기

파일: sigma.py 프로젝트: mirgee/thesis_project

def create_sigma_pkl(in_df, kind, output_path):
    cols = pd.MultiIndex.from_product([CHANNEL_NAMES, algos.measure_names],
                                      names=['channel', 'measure'])
    idxs = pd.MultiIndex.from_product([list(range(1, 134)), ['a', 'b']],
                                      names=['patient', 'trial'])
    main_df = pd.DataFrame(columns=cols, index=idxs)
    for measure_name in in_df.columns.levels[1]:
        for algo in algos.registered_algos:
            if measure_name == algo.algo_name:
                func = algo
                break
        else:
            logging.warning(
                f'Algorithm for measure {measure_name} not regitered, skipping'
            )
            continue

        for row in in_df.iterrows():
            index = row[0][0]
            trial = row[0][1]
            file = files_builder(DataKind(kind)).from_index_trial(index, trial)
            assert ((file.id, file.trial) == (index, trial))
            for channel_name in CHANNEL_NAMES:
                true_stat = in_df.loc[(file.id, file.trial), channel_name]
                try:
                    # TODO: Choose if the time series is to be shortened
                    time_series = file.df.loc[:, channel_name]
                except IndexError:
                    logging.info('Caught index error, skipping...')
                    break
                sigma = compute_sigma(time_series, true_stat, func)
                new_row = {(channel_name, measure_name): sigma}
                main_df.loc[(file.id, file.trial)] = pd.Series(new_row)
                logging.debug("New row: \n%s" % new_row)
                logging.debug(f'Saving training data at {output_path}.')
                logging.info(f'Processed file {file.number}')
                main_df.to_pickle(output_path)

예제 #9

0

파일 보기

파일: meta_info.py 프로젝트: mirgee/thesis_project

def create_meta_df(output_path):
    def _three_class_label(col_extra, col_meta):
        score = extra_df.loc[(index, trial)][col_extra]
        if isinstance(col_meta, tuple):
            q1 = np.quantile(meta_df[col_meta[0]].append(meta_df[col_meta[1]]),
                             0.33)
            q2 = np.quantile(meta_df[col_meta[0]].append(meta_df[col_meta[1]]),
                             0.66)
        else:
            q1 = meta_df.quantile(0.33)[col_meta]
            q2 = meta_df.quantile(0.66)[col_meta]
        if score <= q1:
            dep = -1
        elif score <= q2:
            dep = 0
        else:
            dep = 1
        return dep

    logging.info('Creating dataframe with the meta information.')
    cols = [
        'resp', 'b/a', 'sex', 'age', 'sfreq', 'sc', 'sc_bef', 'sc_aft', 'dep',
        'dep_bef', 'dep_aft', 'change'
    ]
    idxs = pd.MultiIndex.from_product([list(range(1, 134)), ['a', 'b']],
                                      names=['patient', 'trial'])
    extra_df = pd.DataFrame(columns=cols, index=idxs)
    meta_df = files_builder(DataKind.META)
    for file in files_builder(DataKind.PROCESSED):
        index = file.id
        trial = file.trial
        meta_row = meta_df.loc[index, :]
        extra_df.loc[(index, trial)]['resp'] = meta_row['RESP_4W']
        extra_df.loc[(index, trial)]['b/a'] = 0 if trial == 'a' else 1
        extra_df.loc[(index, trial)]['age'] = meta_row['AGE']
        extra_df.loc[(index, trial)]['sex'] = meta_row['SEX']
        extra_df.loc[(index, trial)]['sfreq'] = meta_row['freq']
        m1 = meta_row['M_1']
        m4 = meta_row['M_4']
        extra_df.loc[(index, trial)]['sc'] = m1 if trial == 'a' else m4
        extra_df.loc[(index, trial)]['sc_bef'] = m1
        extra_df.loc[(index, trial)]['sc_aft'] = m4
        extra_df.loc[(index, trial)]['change'] = m1 / m4
        extra_df.loc[(index, trial)]['dep'] = \
            _three_class_label('sc', ('M_1', 'M_4'))
        extra_df.loc[(index, trial)]['dep_bef'] = \
            _three_class_label('sc_bef', 'M_1')
        extra_df.loc[(index, trial)]['dep_aft'] = \
            _three_class_label('sc_aft', 'M_4')
        logging.debug('Added row: \n{}'.format(extra_df.loc[(index, trial)]))

    extra_df = extra_df.astype({
        'resp': 'category',
        'b/a': 'category',
        'sex': 'category',
        'sfreq': int,
        'age': int,
        'sc': float,
        'sc_bef': int,
        'sc_aft': int,
        'dep': int,
        'dep_bef': int,
        'dep_aft': int,
        'change': float
    })
    logging.debug('The resulting data: \n{}'.format(extra_df.describe()))
    logging.debug(f'Saving metadata dataframe at {output_path}.')
    extra_df.to_pickle(output_path)

    output_folder = os.path.sep.join(output_path.split(os.sep)[:-1])
    measures_file = os.path.join(LABELED_ROOT, 'all', 'training.pickle')
    if os.path.isfile(measures_file):
        measures_df = pd.read_pickle(measures_file)
        joined_df = measures_df.join(extra_df)
        joined_path = os.path.join(output_folder, 'measures_w_meta.pkl')
        logging.debug(f'Saving joined dataframe at {joined_path}:\n'
                      f'{joined_df}')
        joined_df.to_pickle(joined_path)