def main(args): arch = importlib.import_module(args.arch).arch normalize = args.normalize model = AlsNetContainer(num_feat=3, num_classes=6, num_points=50000, output_base=args.outDir, arch=arch) logging.info("Loading pretrained model %s" % args.model) model.load_model(args.model) datasets = [] if not os.path.exists(args.outDir): os.makedirs(args.outDir) for filepattern in args.inFiles: for file in glob.glob(filepattern): datasets.append(Dataset(file, load=False, normalize=normalize)) logging.info("File %s loaded" % file) total_acc = 0 total_batch = 0 for idx, dataset in enumerate(datasets): logging.info("Loading dataset %d / %d (%s)" % (idx, len(datasets), dataset.filename)) acc = model.test_single(dataset, save_to=os.path.join(args.outDir, os.path.basename(dataset.file).replace(".la", "_test.la")), save_prob=True, unload=False) logging.info("Current test accuracy: %.2f%%" % (acc * 100.)) meanxy = np.mean(dataset._xyz, axis=1)[0:2] with open(os.path.join(args.outDir, 'result.csv'), 'a') as out_stat_file: out_stat_file.write("%s, %.3f, %.3f, %.4f\n" % (dataset.file, meanxy[0], meanxy[1], acc) ) dataset.unload() total_acc += acc total_batch += 1 logging.info("Current avg test accuracy: %.2f%%" % ((total_acc/total_batch) * 100.)) sys.stdout.flush()
def main(args): inlist = args.inList threshold = args.threshold #train_size = args.trainSize with open(inlist, "rb") as f: _ = f.readline() # remove header rest = f.readlines() datasets = [] all_ds = [] for line in rest: line = line.decode('utf-8') linespl = line.split(",") dataset_path = os.path.join(os.path.dirname(inlist), linespl[0]) if float(linespl[1]) < threshold: datasets.append(dataset_path) all_ds.append(dataset_path) np.random.shuffle(datasets) datasets_th = [] for idx, dataset in enumerate(datasets): print("Loading dataset %s of %s" % (idx + 1, len(datasets))) ds = Dataset(dataset, load=False) datasets_th.append(ds) print("%s datasets loaded." % len(datasets_th)) sys.stdout.flush() rnd_search = RandomizedSearchCV(AlsNetContainer(num_feat=3, num_classes=30, num_points=200000, output_base=args.outDir, score_sample=10), param_distr, n_iter=50, random_state=42, verbose=2, n_jobs=1) rnd_search.fit(datasets_th) print(rnd_search.best_params_)
def main(args): inlist = args.inList threshold = args.threshold train_size = args.trainSize with open(inlist, "rb") as f: _ = f.readline() # remove header rest = f.readlines() datasets = [] for line in rest: line = line.decode('utf-8') linespl = line.split(",") if float(linespl[1]) < threshold: datasets.append(os.path.join(os.path.dirname(inlist), linespl[0])) np.random.shuffle(datasets) inst = AlsNetContainer(num_points=200000, num_classes=30, num_feat=3, arch=arch, output_dir=args.outDir, dropout=args.dropout) logg = Logger(outfile=os.path.join(args.outDir, 'alsNet-log.html'), inst=inst, training_files=datasets) for i in range(len(datasets) // train_size): if i > 0: test_ds = datasets[i * train_size + 1] inst.test_single(test_ds, save_to=os.path.join( args.outDir, os.path.basename(test_ds).replace( ".la", "_test.la")), save_prob=True) print("Training datasets %s to %s (%s total)" % (i * train_size, min( (i + 1) * train_size, len(datasets)), len(datasets))) inst.fit_file(datasets[i * train_size:min((i + 1) * train_size, len(datasets))], new_session=False) logg.save()
def main(args): inlist = args.inList threshold = args.threshold train_size = args.trainSize arch = importlib.import_module( args.archFile).arch if args.archFile else arch lr = args.learningRate normalize_vals = args.normalize == 1 with open(inlist, "rb") as f: _ = f.readline() # remove header rest = f.readlines() datasets = [] all_ds = [] for line in rest: line = line.decode('utf-8') linespl = line.split(",") dataset_path = os.path.join(os.path.dirname(inlist), linespl[0]) # if float(linespl[1]) < threshold and float(linespl[6]) > args.minBuild: datasets.append(dataset_path) all_ds.append(dataset_path) np.random.shuffle(datasets) datasets_th = [] for idx, dataset in enumerate(datasets): print("Loading dataset %s of %s (%s)" % (idx + 1, len(datasets), os.path.basename(dataset))) ds = Dataset(dataset, load=False, normalize=normalize_vals) datasets_th.append(ds) print("%s datasets loaded." % len(datasets_th)) sys.stdout.flush() inst = AlsNetContainer( num_feat=0, num_classes=2, num_points=10000, output_base=args.outDir, score_sample=10, arch=arch, learning_rate=lr, dropout=0.0, loss_fn=simple_loss if args.lossFn == "simple" else fp_high_loss) if args.continueModel is not None: inst.load_model(args.continueModel) logg = Logger(outfile=os.path.join(args.outDir, 'alsNet-log.html'), inst=inst, training_files=datasets_th) for j in range(args.multiTrain): for i in range(len(datasets_th) // train_size): if i > 0: test_ds = datasets_th[min(i * train_size + 1, len(datasets_th) - 1)] inst.test_single(test_ds, save_to=os.path.join( args.outDir, os.path.basename(test_ds.file).replace( ".la", "_test.la")), save_prob=True) print( "Training datasets %s to %s (%s total)" % (i * train_size, min((i + 1) * train_size, len(datasets_th) - 1), len(datasets_th))) inst.fit(datasets_th[i * train_size:min((i + 1) * train_size, len(datasets_th) - 1)], new_session=False) logg.save() inst.save_model( os.path.join(args.outDir, 'models', 'model_%d_%d' % (j, i), 'alsNet.ckpt'))
def main(in_files, density, kNN, out_folder, thinFactor): spacing = np.sqrt(kNN * thinFactor / (np.pi * density)) * np.sqrt(2) / 2 * 0.95 # 5% MARGIN print("Using a spacing of %.2f m" % spacing) print(out_folder) if not os.path.exists(out_folder): os.makedirs(out_folder) # load trained model print("Loading trained model") model = AlsNetContainer(num_feat=3, num_classes=3, num_points=2000000, output_base=out_folder, arch="") model.load_model("/mnt/ssd/shino/log_tm/models/alsNet.ckpt") statlist = [["Filename", "StdDev_Classes", "Ground", "Lo Veg", "Hi Veg"]] for file_pattern in in_files: print(file_pattern) for file in glob.glob(file_pattern): print("Loading file %s" % file) d = dataset.kNNBatchDataset(file=file, k=int(kNN * thinFactor), spacing=spacing) while True: print("Processing batch %d/%d" % (d.currIdx, d.num_batches)) points_and_features, labels = d.getBatches(batch_size=1) idx_to_use = np.random.choice(range(int(thinFactor * kNN)), kNN) names = d.names out_name = d.filename.replace('.la', '_c%04d.la' % d.currIdx) # laz or las # tmp resilt dir out_name_tmp = os.path.splitext(os.path.basename(out_name))[0] out_folder_tmp = out_folder + out_name_tmp print(out_folder_tmp) if not os.path.exists(out_folder_tmp): os.makedirs(out_folder_tmp) out_path = os.path.join(out_folder_tmp, out_name) print(out_path) if points_and_features is not None: # pred acc = model.test_single(points_and_features[0][idx_to_use], save_to=out_path, save_prob=False, unload=True) # dataset.Dataset.Save(out_path, points_and_features[0][idx_to_use], names, # labels=labels[0][idx_to_use], new_classes=None) else: # no more data break # finish prediction one las file # merging result print("Loading reference dataset") ref_ds = Dataset(file) ref_points = ref_ds._xyz out_labels = ref_ds.labels prob_sums = np.zeros((ref_points.shape[0], MAX_CLASSES)) prob_counts = np.zeros((ref_points.shape[0], )) print("Building 2D kD-Tree on the reference dataset") tree = ckdtree.cKDTree(ref_points[:, 0:2]) # only on 2D :D #get predicted las files input_files = os.listdir(out_folder_tmp) for filepattern in in_files: for file in glob.glob(filepattern): input_files.append(file) for fileidx, file in enumerate(input_files): print("Processing file %d" % fileidx) ds = Dataset(file) points = np.hstack( (ds.points_and_features, np.expand_dims(ds.labels, -1))) names = ds.names prob_ids_here = [] prob_ids_ref = [] for idx, name in enumerate(names): if name.startswith('prob_class'): prob_ids_here.append(idx + 3) prob_ids_ref.append(int(name.split('prob_class')[-1])) for ptidx in range(points.shape[0]): xy = points[ptidx, 0:2] ref_ids = tree.query_ball_point(xy, r=0.0001, eps=0.0001) if len(ref_ids) > 1: ref_id = ref_ids[np.argmin( np.abs(ref_points[ref_ids, -1] - points[ptidx, 3]), axis=0)] elif len(ref_ids) == 0: print("Point not found: %s" % xy) continue else: ref_id = ref_ids[0] prob_counts[ref_id] += 1 probs_here = points[ptidx, prob_ids_here] prob_sums[ref_id, prob_ids_ref] += probs_here del ds del points # clear memory ref_ds = None out_points = ref_points print(prob_counts) print(prob_sums[ref_id, :]) prob_avgs = prob_sums / prob_counts[:, np.newaxis] print(prob_avgs) print(prob_avgs[ref_id, :]) new_max_class = np.zeros((ref_points.shape[0])) for i in range(ref_points.shape[0]): curr_point = prob_sums[i, :] / prob_counts[i] curr_point_max = np.argmax(curr_point) new_max_class[i] = curr_point_max final = np.zeros((ref_points.shape[0], 4)) final[:, :3] = ref_points[:, :3] new_max_class = np.where(new_max_class == 2, 6, new_max_class) new_max_class = np.where(new_max_class == 0, 2, new_max_class) new_max_class = np.where(new_max_class == 1, 6, new_max_class) final[:, 3] = new_max_class # save mearged data out_name_fin = os.path.splitext(os.path.basename(out_name))[0] out_folder_fin = out_folder + "_" + out_name_fin if not os.path.exists(out_folder_fin): os.makedirs(out_folder_fin) savename = out_name_fin + ".txt" np.savetxt(savename, final)