def read_and_extract_features(reader, count): read_chunk_size = 1000 assert (count % read_chunk_size == 0) Xs = [] ys = [] for i in range(count // read_chunk_size): (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size) X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features) Xs.append(X) ys += y Xs = np.concatenate(Xs, axis=0) return (Xs, ys)
def read_and_extract_features(reader, count): read_chunk_size = 1000 #assert (count % read_chunk_size == 0) Xs = [] ys = [] for i in range(count // read_chunk_size): (chunk, ts, y, header) = utils.read_chunk(reader, read_chunk_size) X = common_utils.extract_features_from_rawdata(chunk, header, args.period, args.features) Xs.append(X) ys += y Xs = np.concatenate(Xs, axis=0) bins = np.array([one_hot(metrics.get_bin_custom(x, nbins)) for x in ys]) return (Xs, bins, ys)
def process_one_chunk(mode, chunk_index): assert (mode == "train" or mode == "test") if (mode == "train"): reader = train_reader if (mode == "test"): reader = val_reader (data, ts, ys, header) = utils.read_chunk(reader, chunk_size) data = utils.preprocess_chunk(data, ts, discretizer, normalizer) if (mode == "train"): network.set_datasets((data, ys), None) if (mode == "test"): network.set_datasets(None, (data, ys)) network.shuffle_train_set() y_true = [] predictions = [] avg_loss = 0.0 sum_loss = 0.0 prev_time = time.time() n_batches = network.get_batches_per_epoch(mode) for i in range(0, n_batches): step_data = network.step(mode) prediction = step_data["prediction"] answers = step_data["answers"] current_loss = step_data["current_loss"] current_loss_mse = step_data["loss_mse"] current_loss_reg = step_data["loss_reg"] log = step_data["log"] avg_loss += current_loss sum_loss += current_loss for x in answers: y_true.append(x) for x in prediction: predictions.append(x) if ((i + 1) % args.log_every == 0): cur_time = time.time() print (" %sing: %d.%d / %d \t loss: %.3f = %.3f + %.3f \t avg_loss: %.3f \t"\ "%s \t time: %.2fs" % (mode, chunk_index, i * args.batch_size, n_batches * args.batch_size, current_loss, current_loss_mse, current_loss_reg, avg_loss / args.log_every, log, cur_time - prev_time)) avg_loss = 0 prev_time = cur_time if np.isnan(current_loss): raise Exception("current loss IS NaN. This should never happen :)") sum_loss /= n_batches print "\n %s loss = %.5f" % (mode, sum_loss) if args.network in ['lstm', 'lstm_log']: metrics.print_metrics_regression(y_true, predictions) if args.network == 'lstm_cf_log': metrics.print_metrics_log_bins(y_true, predictions) if args.network == 'lstm_cf_custom': metrics.print_metrics_custom_bins(y_true, predictions) return sum_loss
test_reader = LengthOfStayReader( dataset_dir='../../data/length-of-stay/test/', listfile='../../data/length-of-stay/test_listfile.csv') n_batches = test_reader.get_number_of_examples() // args.batch_size y_true = [] predictions = [] avg_loss = 0.0 sum_loss = 0.0 prev_time = time.time() n_batches = 1000 # TODO: remove this, to test on full data for i in range(n_batches): (data, ts, ys, header) = utils.read_chunk(test_reader, args.batch_size) data = utils.preprocess_chunk(data, ts, discretizer, normalizer) ret = network.predict((data, ys)) prediction = ret[0] current_loss = ret[1] avg_loss += current_loss sum_loss += current_loss for x in ys: y_true.append(x) for x in prediction: predictions.append(x) if ((i + 1) % args.log_every == 0):