Example #1
0
def load_physio(feature_funcs):
    description = """ Loads a dataset containing physiological data. 

    It returns a Bunch with the following attributes:
    - data: n_samples by n_features 2D array. There are 7890 samples
    (patient episodes), and 52 features (mean, standard deviation, max
    and min for each of 13 physiological variables)
    - c_target: target variable to be used for classification: mortality.
    0 indicates survival, 1 indicates death.
    - r_target: target variable to be used for regression: length of stay 
    in days.
    - DESCR: a description of this dataset (for now, the docstring of 
    this function)

    """
    headers, episodes = datafiles.load_episodes()
    headers, outcomes = datafiles.load_outcomes()  # don't care about headers

    # construct the prediction dataset
    num_samples = len(episodes)
    num_features = len(feature_funcs) * datafiles.NUM_VARS
    data = np.zeros((num_samples, num_features))

    # for each sample (episode)
    for i, (episode_id, episode_data) in enumerate(episodes):
        # compute its features (stats for each physiological variable)
        for v in range(datafiles.NUM_VARS):
            start = v * len(feature_funcs)
            end = (v + 1) * len(feature_funcs)
            data[i, start:end] = _compute_features(episode_data[v],
                                                   feature_funcs)

    # obtain the target variables
    episode_ids, episode_outcomes = unzip(outcomes)
    los, mortality, med_los = map(np.asarray, unzip(episode_outcomes))

    # mortality, for classification
    c_target = mortality

    # length of stay, for regression
    r_target = np.floor(los / (24 * 60 * 60))  # convert from secs to days

    result = Bunch(data=data,
                   r_target=r_target,
                   c_target=c_target,
                   DESCR=description)
    return result
Example #2
0
def load_physio(feature_funcs):
    description = """ Loads a dataset containing physiological data. 

    It returns a Bunch with the following attributes:
    - data: n_samples by n_features 2D array. There are 7890 samples
    (patient episodes), and 52 features (mean, standard deviation, max
    and min for each of 13 physiological variables)
    - c_target: target variable to be used for classification: mortality.
    0 indicates survival, 1 indicates death.
    - r_target: target variable to be used for regression: length of stay 
    in days.
    - DESCR: a description of this dataset (for now, the docstring of 
    this function)

    """
    headers, episodes = datafiles.load_episodes()
    headers, outcomes = datafiles.load_outcomes() # don't care about headers
    
    # construct the prediction dataset
    num_samples = len(episodes)
    num_features = len(feature_funcs) * datafiles.NUM_VARS
    data = np.zeros((num_samples, num_features))  

    # for each sample (episode)
    for i, (episode_id, episode_data) in enumerate(episodes):
        # compute its features (stats for each physiological variable)
        for v in range(datafiles.NUM_VARS):
            start = v * len(feature_funcs)
            end = (v + 1) * len(feature_funcs)
            data[i, start:end] =_compute_features(episode_data[v], 
                                                  feature_funcs)

    # obtain the target variables
    episode_ids, episode_outcomes = unzip(outcomes) 
    los, mortality, med_los = map(np.asarray, unzip(episode_outcomes))

    # mortality, for classification
    c_target = mortality  
    
    # length of stay, for regression
    r_target = np.floor(los / (24 * 60 * 60)) # convert from secs to days

    result = Bunch(data=data,
                   r_target=r_target, 
                   c_target=c_target,
                   DESCR=description) 
    return result
Example #3
0
def _compute_features(measurements, functions):
    """ Compute basic features for a time series of measurements """
    features = np.zeros(len(functions))
    if measurements:
        times, values = map(np.asarray, unzip(measurements))
        for i, f in enumerate(functions):
            features[i] = f(values, times)
    return features
Example #4
0
def _compute_features(measurements, functions):
    """ Compute basic features for a time series of measurements """
    features = np.zeros(len(functions))
    if measurements:
        times, values = map(np.asarray, unzip(measurements))
        for i, f in enumerate(functions):
            features[i] = f(values, times)
    return features
Example #5
0
    testing_data = data[training_size:training_size + testing_size]
    return training_data, testing_data


training_data, testing_data = get_mnist_data(60000, 10000)
evaluate, train, save = build_mnist_model()

printer = Printer(0.1)
encoder = OneHotEncoder(10)

iterations = 5
batch_sizes = 5
for itr in range(iterations):
    # Preparing the training data
    np.random.shuffle(training_data)
    inputs, targets = unzip(training_data)
    input_batches = np.array_split(np.asarray(inputs),
                                   len(inputs) / batch_sizes)
    target_batches = np.array_split(np.asarray(targets),
                                    len(targets) / batch_sizes)

    if itr == 0:
        print 'training with', len(input_batches), 'batches of size', len(
            input_batches[0])
        total_iteartions = len(input_batches) * iterations

    for i, (input_batch,
            target_batch) in enumerate(zip(input_batches, target_batches)):
        target_batch = encoder.encode(target_batch)
        error = train(input_batch, target_batch, 4)
        current_iteration = itr * len(input_batches) + i
    training_data = data[0:training_size]
    testing_data = data[training_size:training_size+testing_size]
    return training_data, testing_data

training_data, testing_data = get_mnist_data(60000, 10000)
evaluate, train, save = build_mnist_model()

printer = Printer(0.1)
encoder = OneHotEncoder(10)

iterations = 5
batch_sizes = 5
for itr in range(iterations):
    # Preparing the training data
    np.random.shuffle(training_data)
    inputs, targets = unzip(training_data)
    input_batches = np.array_split(np.asarray(inputs), len(inputs) / batch_sizes)
    target_batches = np.array_split(np.asarray(targets), len(targets) / batch_sizes)

    if itr == 0:
        print 'training with', len(input_batches), 'batches of size', len(input_batches[0])
        total_iteartions = len(input_batches) * iterations

    for i, (input_batch, target_batch) in enumerate(zip(input_batches, target_batches)):
        target_batch = encoder.encode(target_batch)
        error = train(input_batch, target_batch, 4)
        current_iteration = itr*len(input_batches) + i
        printer.overwrite('training ' + str(int(current_iteration * 100. / total_iteartions)) + '% - error:' + str(error))

printer.clear()