Ejemplo n.º 1
0
def read_and_extract_features(reader, count):
    read_chunk_size = 1000
    assert (count % read_chunk_size == 0)
    Xs = []
    ys = []
    for i in range(count // read_chunk_size):
        (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size)
        X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features)
        Xs.append(X)
        ys += y
    Xs = np.concatenate(Xs, axis=0)
    return (Xs, ys)
Ejemplo n.º 2
0
def read_and_extract_features(reader, count):
    read_chunk_size = 1000
    #assert (count % read_chunk_size == 0)
    Xs = []
    ys = []
    for i in range(count // read_chunk_size):
        (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size)
        X = common_utils.extract_features_from_rawdata(chunk, header,
                                                       args.period,
                                                       args.features)
        Xs.append(X)
        ys += y
    Xs = np.concatenate(Xs, axis=0)
    bins = np.array([one_hot(metrics.get_bin_custom(x, nbins)) for x in ys])
    return (Xs, bins, ys)
Ejemplo n.º 3
0
def process_one_chunk(mode, chunk_index):
    assert (mode == "train" or mode == "test")

    if (mode == "train"):
        reader = train_reader
    if (mode == "test"):
        reader = val_reader

    (data, ts, ys, header) = utils.read_chunk(reader, chunk_size)
    data = utils.preprocess_chunk(data, ts, discretizer, normalizer)

    if (mode == "train"):
        network.set_datasets((data, ys), None)
    if (mode == "test"):
        network.set_datasets(None, (data, ys))

    network.shuffle_train_set()

    y_true = []
    predictions = []
    avg_loss = 0.0
    sum_loss = 0.0
    prev_time = time.time()
    n_batches = network.get_batches_per_epoch(mode)

    for i in range(0, n_batches):
        step_data = network.step(mode)
        prediction = step_data["prediction"]
        answers = step_data["answers"]
        current_loss = step_data["current_loss"]
        current_loss_mse = step_data["loss_mse"]
        current_loss_reg = step_data["loss_reg"]
        log = step_data["log"]

        avg_loss += current_loss
        sum_loss += current_loss

        for x in answers:
            y_true.append(x)

        for x in prediction:
            predictions.append(x)

        if ((i + 1) % args.log_every == 0):
            cur_time = time.time()
            print ("  %sing: %d.%d / %d \t loss: %.3f = %.3f + %.3f \t avg_loss: %.3f \t"\
                   "%s \t time: %.2fs" % (mode, chunk_index, i * args.batch_size,
                        n_batches * args.batch_size,
                        current_loss, current_loss_mse, current_loss_reg,
                        avg_loss / args.log_every, log, cur_time - prev_time))
            avg_loss = 0
            prev_time = cur_time

        if np.isnan(current_loss):
            raise Exception("current loss IS NaN. This should never happen :)")

    sum_loss /= n_batches
    print "\n  %s loss = %.5f" % (mode, sum_loss)

    if args.network in ['lstm', 'lstm_log']:
        metrics.print_metrics_regression(y_true, predictions)
    if args.network == 'lstm_cf_log':
        metrics.print_metrics_log_bins(y_true, predictions)
    if args.network == 'lstm_cf_custom':
        metrics.print_metrics_custom_bins(y_true, predictions)

    return sum_loss
Ejemplo n.º 4
0
    test_reader = LengthOfStayReader(
        dataset_dir='../../data/length-of-stay/test/',
        listfile='../../data/length-of-stay/test_listfile.csv')

    n_batches = test_reader.get_number_of_examples() // args.batch_size
    y_true = []
    predictions = []
    avg_loss = 0.0
    sum_loss = 0.0
    prev_time = time.time()

    n_batches = 1000  # TODO: remove this, to test on full data

    for i in range(n_batches):
        (data, ts, ys, header) = utils.read_chunk(test_reader, args.batch_size)
        data = utils.preprocess_chunk(data, ts, discretizer, normalizer)
        ret = network.predict((data, ys))
        prediction = ret[0]
        current_loss = ret[1]

        avg_loss += current_loss
        sum_loss += current_loss

        for x in ys:
            y_true.append(x)

        for x in prediction:
            predictions.append(x)

        if ((i + 1) % args.log_every == 0):