def parse_csv(filename, hdim):
    """Parses ECG data from csv file."""
    df = pd.read_csv(filename)
    labels = df[df['input channel '] == 0].label.values
    seqs = []
    for i, row in df.iterrows():
        if i % 2:
            assert row[1] == 1
            continue
        assert row[1] == 0
        ch1_vals = row[3:].values.astype(np.float32).reshape(-1, 1)
        ch2_vals = df.iloc[i + 1][3:].values.astype(np.float32).reshape(-1, 1)
        if FLAGS.channel == 'both':
            values = np.concatenate([ch1_vals, ch2_vals], axis=1)
        elif FLAGS.channel == '1':
            values = ch1_vals
        elif FLAGS.channel == '2':
            values = ch2_vals
        else:
            raise ValueError('Channel flag expected to be both, 1, or 2.')
        seq_len = values.shape[0]
        if np.isnan(values).any():
            seq_len = np.min(np.where(np.isnan(values).any(axis=1)))
            assert np.isnan(values[seq_len:]).all()
            values = values[:seq_len]
        seqs.append(
            lds.LinearDynamicalSystemSequence(np.zeros((seq_len, 1)),
                                              np.zeros((seq_len, hdim)),
                                              values))
    return seqs, labels
def parse_csv(filename, hdim):
    """Reads ECG data from csv file."""
    labels = []
    seqs = []
    unprocessed_key = None
    unprocessed_label = None
    unprocessed_ch0 = None
    not_full_length = 0
    with open(filename, 'rb') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            key = row[0]
            channel = row[1]
            label = row[2]
            channel_signal = np.array(row[3:]).reshape(-1, 1)
            try:
                channel_signal = channel_signal.astype(np.float32)
            except ValueError:
                channel_signal = np.array([
                    float(x) if x else np.nan for x in row[3:]
                ]).reshape(-1, 1)
                # logging.info('Partial signal of len %d with key %s',
                #     sum(~np.isnan(channel_signal)), key)
                not_full_length += 1
            if channel == '0':
                assert unprocessed_ch0 is None
                unprocessed_ch0 = channel_signal
                unprocessed_key = key
                unprocessed_label = label
            if channel == '1':
                assert unprocessed_ch0 is not None
                seq_len = len(channel_signal)
                assert len(unprocessed_ch0) == seq_len
                if FLAGS.channel == 'both':
                    vals = np.concatenate([unprocessed_ch0, channel_signal],
                                          axis=1)
                elif FLAGS.channel == '0':
                    vals = unprocessed_ch0
                elif FLAGS.channel == '1':
                    vals = channel_signal
                else:
                    raise ValueError('Unexpected FLAGS.channel value: %s' %
                                     FLAGS.channel)
                seqs.append(
                    lds.LinearDynamicalSystemSequence(
                        np.zeros((seq_len, 1)), np.zeros((seq_len, hdim)),
                        vals))
                assert label == unprocessed_label
                assert key.split(':')[:2] == unprocessed_key.split(':')[:2]
                labels.append(label)
                unprocessed_label = None
                unprocessed_key = None
                unprocessed_ch0 = None
    logging.info('Total seqs: %d, partial length seqs: %d.', len(seqs),
                 not_full_length)
    if FLAGS.filter_type:
        seqs, labels = filter_type(seqs, labels)
    seqs, labels = drop_infreq_labels(seqs, labels)
    return seqs, labels
def subsample(sequences, step_size=5):
    subsampled = []
    for s in sequences:
        subsampled.append(
            lds.LinearDynamicalSystemSequence(
                _subsample_rows(s.inputs, step_size),
                _subsample_rows(s.hidden_states, step_size),
                _subsample_rows(s.outputs, step_size)))
    return subsampled