def parse_csv(filename, hdim): """Parses ECG data from csv file.""" df = pd.read_csv(filename) labels = df[df['input channel '] == 0].label.values seqs = [] for i, row in df.iterrows(): if i % 2: assert row[1] == 1 continue assert row[1] == 0 ch1_vals = row[3:].values.astype(np.float32).reshape(-1, 1) ch2_vals = df.iloc[i + 1][3:].values.astype(np.float32).reshape(-1, 1) if FLAGS.channel == 'both': values = np.concatenate([ch1_vals, ch2_vals], axis=1) elif FLAGS.channel == '1': values = ch1_vals elif FLAGS.channel == '2': values = ch2_vals else: raise ValueError('Channel flag expected to be both, 1, or 2.') seq_len = values.shape[0] if np.isnan(values).any(): seq_len = np.min(np.where(np.isnan(values).any(axis=1))) assert np.isnan(values[seq_len:]).all() values = values[:seq_len] seqs.append( lds.LinearDynamicalSystemSequence(np.zeros((seq_len, 1)), np.zeros((seq_len, hdim)), values)) return seqs, labels
def parse_csv(filename, hdim): """Reads ECG data from csv file.""" labels = [] seqs = [] unprocessed_key = None unprocessed_label = None unprocessed_ch0 = None not_full_length = 0 with open(filename, 'rb') as csvfile: reader = csv.reader(csvfile) for row in reader: key = row[0] channel = row[1] label = row[2] channel_signal = np.array(row[3:]).reshape(-1, 1) try: channel_signal = channel_signal.astype(np.float32) except ValueError: channel_signal = np.array([ float(x) if x else np.nan for x in row[3:] ]).reshape(-1, 1) # logging.info('Partial signal of len %d with key %s', # sum(~np.isnan(channel_signal)), key) not_full_length += 1 if channel == '0': assert unprocessed_ch0 is None unprocessed_ch0 = channel_signal unprocessed_key = key unprocessed_label = label if channel == '1': assert unprocessed_ch0 is not None seq_len = len(channel_signal) assert len(unprocessed_ch0) == seq_len if FLAGS.channel == 'both': vals = np.concatenate([unprocessed_ch0, channel_signal], axis=1) elif FLAGS.channel == '0': vals = unprocessed_ch0 elif FLAGS.channel == '1': vals = channel_signal else: raise ValueError('Unexpected FLAGS.channel value: %s' % FLAGS.channel) seqs.append( lds.LinearDynamicalSystemSequence( np.zeros((seq_len, 1)), np.zeros((seq_len, hdim)), vals)) assert label == unprocessed_label assert key.split(':')[:2] == unprocessed_key.split(':')[:2] labels.append(label) unprocessed_label = None unprocessed_key = None unprocessed_ch0 = None logging.info('Total seqs: %d, partial length seqs: %d.', len(seqs), not_full_length) if FLAGS.filter_type: seqs, labels = filter_type(seqs, labels) seqs, labels = drop_infreq_labels(seqs, labels) return seqs, labels
def subsample(sequences, step_size=5): subsampled = [] for s in sequences: subsampled.append( lds.LinearDynamicalSystemSequence( _subsample_rows(s.inputs, step_size), _subsample_rows(s.hidden_states, step_size), _subsample_rows(s.outputs, step_size))) return subsampled