Ejemplo n.º 1
0
def xml_to_yolo_4(boxes, label_map, save_dir="yolo/", ratio=0.8, shuffle=True, no_obj_dir=None):
	save_dir = Path(save_dir)

	train_dir = save_dir / "train"
	valid_dir = save_dir / "val"

	save_dir.mkdir()
	train_dir.mkdir()
	valid_dir.mkdir()

	boxes_by_name = boxes.getBoxesBy(lambda box: box.getImageName())
	image_names = sorted(boxes_by_name.keys())
	nb_train = round(ratio * len(boxes_by_name))
	new_names = []

	if shuffle:
		random_gen = random.Random(498_562_751)
		image_names = random_gen.sample(image_names, len(image_names))

	for i, image_name in tenumerate(image_names, unit="img"):
		image_boxes = boxes_by_name[image_name]
		folder = train_dir if i < nb_train else valid_dir

		new_image_name = folder / Path(image_name).with_stem(f"im_{i:06}").name
		new_names.append(new_image_name)
		description = "\n".join("{} {} {} {} {}".format(label_map[box.getClassId()], *box.getRelativeBoundingBox()) for box in image_boxes)

		new_image_name.with_suffix(".txt").write_text(description)
		shutil.copy(image_name, new_image_name)

	(save_dir / "train.txt").write_text("".join(f"{Path('data/train') / n.name}\n" for n in new_names[:nb_train]))
	(save_dir / "val.txt").write_text("".join(f"{Path('data/val') / n.name}\n" for n in new_names[nb_train:]))

	if no_obj_dir:
		no_obj_dir = Path(no_obj_dir)
		image_names = sorted(list(no_obj_dir.glob("*.jpg")))
		nb_train = round(ratio * len(image_names))
		new_names = []

		if shuffle:
			rand_gen = random.Random(478_737_303)
			image_names = rand_gen.sample(image_names, len(image_names))
		
		for i, image_name in tenumerate(image_names):
			folder = train_dir if i < nb_train else valid_dir
			new_image_name = folder / Path(image_name).with_stem(f"im_no_obj_{i:06}").name
			new_names.append(new_image_name)
			shutil.copy(image_name, new_image_name)
			new_image_name.with_suffix(".txt").touch()
		
		with (save_dir / "train.txt").open("a") as f:
			f.write("".join(f"{Path('data/train') / n.name}\n" for n in new_names[:nb_train]))

		with (save_dir / "val.txt").open("a") as f:
			f.write("".join(f"{Path('data/val') / n.name}\n" for n in new_names[nb_train:]))
Ejemplo n.º 2
0
 def evaluation_measures(self):
     df_path = []
     df_match = []
     for i, j in tenumerate(self.select_rows()):
         a = converting_path_to_xy(j[1])
         b = converting_path_to_xy(j[0])
         df_path.append(a)
         df_match.append(b)
     dist_frech_cut = []
     dist_frech_full = []
     arc_length_diff_cut = []
     arc_length_diff_full = []
     tracked_vehicle = []
     mode = []
     for i, j in tenumerate(df_path):
         tracked_vehicle.append(j['Tracked Vehicle'].values[0])
         mode.append(j['Type'].values[0])
         p = j.loc[:, ['x', 'y']]
         q = df_match[i].loc[:, ['x', 'y']]
         if len(j) < 3:
             dist_frech_cut.append(0)
             arc_length_diff_cut.append(0)
             d2 = similaritymeasures.frechet_dist(p.values, q.values)
             dist_frech_full.append(d2)
             l_p_f = similaritymeasures.get_arc_length(p.values)
             l_p2 = l_p_f[0]
             l_m_f = similaritymeasures.get_arc_length(q.values)
             l_m2 = l_m_f[0]
             arc_length_diff_full.append(round(abs(l_p2 - l_m2), 3))
             continue
         d1 = similaritymeasures.frechet_dist(p.values[1:-1],
                                              q.values[1:-1])
         d2 = similaritymeasures.frechet_dist(p.values, q.values)
         l_p = similaritymeasures.get_arc_length(p.values[1:-1])
         l_p1 = l_p[0]
         l_m = similaritymeasures.get_arc_length(q.values[1:-1])
         l_m1 = l_m[0]
         l_p_f = similaritymeasures.get_arc_length(p.values)
         l_p2 = l_p_f[0]
         l_m_f = similaritymeasures.get_arc_length(q.values)
         l_m2 = l_m_f[0]
         dist_frech_full.append(d2)
         dist_frech_cut.append(d1)
         arc_length_diff_cut.append(round(abs(l_p1 - l_m1), 3))
         arc_length_diff_full.append(round(abs(l_p2 - l_m2), 3))
     evaluation = {
         'ID': tracked_vehicle,
         'Type': mode,
         'Frechet_distance': dist_frech_full,
         'Frechet_distance_cut': dist_frech_cut,
         'Length_difference': arc_length_diff_full,
         'Length_difference_cut': arc_length_diff_cut
     }
     evaluation = pd.DataFrame(evaluation)
     return evaluation
Ejemplo n.º 3
0
def test_enumerate():
    """Test contrib.tenumerate"""
    with closing(StringIO()) as our_file:
        a = range(9)
        assert list(tenumerate(a, file=our_file)) == list(enumerate(a))
        assert list(tenumerate(a, 42, file=our_file)) == list(enumerate(a, 42))
    with closing(StringIO()) as our_file:
        _ = list(tenumerate((i for i in a), file=our_file))
        assert "100%" not in our_file.getvalue()
    with closing(StringIO()) as our_file:
        _ = list(tenumerate((i for i in a), file=our_file, total=len(a)))
        assert "100%" in our_file.getvalue()
 def fraction_wrongly_matched(self, threshold_angle=45):
     wrongly_matched = {
         'id': [],
         'type': [],
         'wrong_1': [],
         'average_speed_1': [],
         'bool_w1': [],
         'wrong_2': [],
         'average_speed_2': [],
         'bool_w2': [],
         'wrong_both': [],
         'bool_wb': [],
         'length_trajectory': []
     }
     for ind, traj in tenumerate(self.tracks_line):
         wrongly_matched['id'].append((ind, traj['track_id'].values[0]))
         wrongly_matched['type'].append(traj['type'].values[0])
         wm = traj['wrong_match'].values
         speeds = traj[['speed_x', 'speed_y']].values
         w1 = [
             speeds[i][0] for i, j in enumerate(wm)
             if j[0] > threshold_angle
         ]
         w2 = [
             speeds[i][1] for i, j in enumerate(wm)
             if j[1] > threshold_angle
         ]
         wb = [
             i for i, j in enumerate(wm)
             if j[0] > threshold_angle and j[1] > threshold_angle
         ]
         wrongly_matched['wrong_1'].append(
             round(len(w1) / len(traj) * 100, 1))
         wrongly_matched['wrong_2'].append(
             round(len(w2) / len(traj) * 100, 1))
         wrongly_matched['wrong_both'].append(
             round(len(wb) / len(traj) * 100, 1))
         if w1:
             wrongly_matched['bool_w1'].append(True)
         else:
             wrongly_matched['bool_w1'].append(False)
         if w2:
             wrongly_matched['bool_w2'].append(True)
         else:
             wrongly_matched['bool_w2'].append(False)
         if wb:
             wrongly_matched['bool_wb'].append(True)
         else:
             wrongly_matched['bool_wb'].append(False)
         if w1:
             wrongly_matched['average_speed_1'].append(np.mean(w1))
         else:
             wrongly_matched['average_speed_1'].append(0)
         if w2:
             wrongly_matched['average_speed_2'].append(np.mean(w2))
         else:
             wrongly_matched['average_speed_2'].append(0)
         wrongly_matched['length_trajectory'].append(len(traj))
     wrongly_matched = pd.DataFrame(wrongly_matched)
     return wrongly_matched
Ejemplo n.º 5
0
def get_label_probabilities(brainweb_file,
                            labels=None,
                            outres="mMR",
                            progress=True,
                            dtype=np.float32):
    """
    @param labels  : list of strings, [default: Act.all_labels]
    @return out  : 4D array of masks resampled as per `outres` (useful for PVC)
    """
    out_shape = getattr(Shape, outres)
    raw_data = load_file(brainweb_file)
    if labels is None:
        labels = Act.all_labels
    if set(labels).difference(Act.all_labels):
        raise KeyError("labels (%s) must be in Act.all_labels (%s)" %
                       (", ".join(labels), ", ".join(Act.all_labels)))

    num_classes = len(labels)
    res = np.zeros((num_classes, ) + tuple(out_shape), dtype=dtype)
    for i, attr in tenumerate(labels,
                              unit="label",
                              desc="BrainWeb labels",
                              disable=not progress):

        class MAct(Act):
            attrs = [attr]

        setattr(MAct, attr, 1)
        res[i] = toPetMmr(raw_data, outres=outres, modes=[MAct])[0][:, ::-1]

    return res
Ejemplo n.º 6
0
 def match_fixed_distance(self, list_index=None):
     # logger = lm.logger
     # logger.setLevel(logging.DEBUG)
     # logger.addHandler(logging.StreamHandler(sys.stdout))
     tic = time.time()
     traj_mov_match = []
     special_cases = []
     point_traj = self.list_traj
     if list_index is None:
         point_traj = [
             j for i, j in enumerate(point_traj) if i in list_index
         ]
     for i, j in tenumerate(point_traj):
         while True:
             try:
                 traj = map_matching(j, self.gdf_netw, self.map,
                                     self.max_init, self.max_d)
                 traj_mov_match.append(traj)
                 break
             except Exception:
                 special_cases.append(j)
                 break
     toc = time.time()
     print(
         f'{int(divmod(toc - tic, 60)[0])} min {int(divmod(toc - tic, 60)[1])} sec'
     )
     return traj_mov_match, special_cases
Ejemplo n.º 7
0
def get_data(
    use_cache: bool = True, num_wavelens: int = 300
) -> Tuple[LaserParams, Emiss, torch.LongTensor]:
    """Data is sorted in ascending order of wavelength."""
    if all(
        [
            use_cache,
            Path("/data-new/alok/laser/data.pt").exists(),
        ]
    ):
        data = torch.load(Path("/data-new/alok/laser/data.pt"))
        norm_laser_params, interp_emissivities, uids = (
            data["normalized_laser_params"],
            data["interpolated_emissivity"],
            data["uids"],
        )

        # XXX check length to avoid bugs.
        if interp_emissivities.shape[-1] == num_wavelens:
            return norm_laser_params, interp_emissivities, uids

    client = pymongo.MongoClient(
        "mongodb://*****:*****@mongodb07.nersc.gov/propopt"
    )
    db = client.propopt.laser_samples2
    laser_params, emissivity, wavelength = [], [], []
    interp_emissivities, interp_wavelengths = [], []
    uids = []
    # TODO: clean up and generalize when needed
    # the values are indexes for one hot vectorization
    wattage_idxs = {
        0.2: 0,
        0.3: 1,
        0.4: 2,
        0.5: 3,
        0.6: 4,
        0.7: 5,
        0.8: 6,
        0.9: 7,
        1.0: 8,
        1.1: 9,
        1.2: 10,
        1.3: 11,
        # these last 2 wattages are problematic since their
        # emissivities are different lengths
        # 1.4: 12,
        # 1.5: 13,
    }

    # TODO: relax this to all wattages, try discretizing them w/
    # softmax instead
    for uid, entry in tenumerate(db.find()):
        # TODO: ensure that this is sorted by wavelength
        # TODO log transform?
        emiss_plot: List[float] = [
            e
            for ex in entry["emissivity_spectrum"]
            if ((e := ex["normal_emissivity"]) != 1.0 and ex["wavelength_micron"] < 12)
        ]
Ejemplo n.º 8
0
def test_enumerate_numpy():
    """Test contrib.tenumerate(numpy.ndarray)"""
    try:
        import numpy as np
    except ImportError:
        raise SkipTest
    with closing(StringIO()) as our_file:
        a = np.random.random((42, 1337))
        assert list(tenumerate(a, file=our_file)) == list(np.ndenumerate(a))
Ejemplo n.º 9
0
    def get_progress_bar(self, data_loader, total, description):
        if get_rank() == 0:
            pbar = tenumerate(data_loader, total=total // data_loader.batch_size + 1, desc=description, leave=False)
        else:
            pbar = enumerate(data_loader)

        yield pbar

        if hasattr(pbar, 'close'):
            pbar.close()
Ejemplo n.º 10
0
 def make_line_trajectories(self):
     traj_line_match = []
     gdf_netw = self.used_network
     for i, j in tenumerate(self.point_trajectories):
         tr_m = pd.merge(j,
                         gdf_netw[['bearing', 'edge']],
                         how='left',
                         on=['edge'])
         tr_m = tr_m.rename(columns={
             'bearing_x': 'bearing',
             'bearing_y': 'bearing_edge'
         })
         diff = tr_m[['bearing', 'bearing_edge']].values
         bearing_diff = [
             round(abs(diff[a][0] - diff[a][1]), 1)
             for a in range(0, len(tr_m))
         ]
         for a, b in enumerate(bearing_diff):
             if b > 180:
                 bearing_diff[a] = round(360 - b, 1)
         j['wrong_match'] = bearing_diff
         # point dataset with nodes of matched edge, this adds column to all original dataframes (chained assignment)
         tr = j[:-1]
         # making line dataset --> always start and end point --> last point has no successive point --> -1
         u_edge = j['edge'].values[:-1]
         v_edge = j['edge'].values[1:]
         w_1 = j['wrong_match'].values[:-1]
         w_2 = j['wrong_match'].values[1:]
         w = tuple(zip(w_1, w_2))
         c = {
             'u_match': u_edge,
             'v_match': v_edge,
             'time': tr['time'].values + 1000,
             'wrong_match': w
         }
         df = pd.DataFrame(c)
         p = [
             LineString(
                 [j['geometry'].values[k], j['geometry'].values[k + 1]])
             for k in range(0,
                            len(j) - 1)
         ]
         tr = tr.drop([
             'geometry', 'time', 'N1_match', 'N2_match', 'wrong_match',
             'edge'
         ],
                      axis=1)
         tr = pd.concat([tr, df], axis=1)
         tr = gpd.GeoDataFrame(tr, geometry=p)
         tr = pd.merge(tr, j.iloc[1:, 8:15], how='inner', on=['time'])
         traj_line_match.append(tr)
     self.line_trajectories = traj_line_match
Ejemplo n.º 11
0
def xml_to_yolo_3(boundingBoxes, yolo_dir, names_to_labels, ratio=0.8, shuffled=True):
	train_dir = os.path.join(yolo_dir, 'train')
	val_dir = os.path.join(yolo_dir, 'val')
	train_file = os.path.join(yolo_dir, 'train.txt')
	val_file = os.path.join(yolo_dir, 'val.txt')

	if not os.path.isdir(yolo_dir):
		os.mkdir(yolo_dir)
	if not os.path.isdir(train_dir):
		os.mkdir(train_dir)
	if not os.path.isdir(val_dir):
		os.mkdir(val_dir)

	boxes_by_name = boundingBoxes.getBoxesBy(lambda box: box.getImageName())
	names = sorted(boxes_by_name.keys())
	new_names = []

	if shuffled == True:
		random_gen = random.Random(498_562_751)
		names = random_gen.sample(names, len(names))

	number_train = round(ratio*len(names))

	for (i, name) in tenumerate(names):
		yolo_rep = []
		img_path = os.path.splitext(name)[0] + '.jpg'
		idenfier = 'im_{}'.format(i)
		new_names.append(idenfier + ".jpg")

		save_dir = train_dir if i < number_train else val_dir
		for box in boxes_by_name[name]:
			label = names_to_labels[box.getClassId()]
			x, y, w, h = box.getRelativeBoundingBox()

			yolo_rep.append('{} {} {} {} {}\n'.format(label, x, y, w, h))

		with open(os.path.join(save_dir, idenfier + '.txt'), 'w') as f_write:
			f_write.writelines(yolo_rep)

		shutil.copy(img_path, os.path.join(save_dir, idenfier + '.jpg'))

	with open(train_file, "w") as f:
		for item in new_names[:number_train]:
			relative_path = os.path.split(item)[1]
			new_path = os.path.join("data/train/", relative_path)
			f.write(new_path + "\n")

	with open(val_file, "w") as f:
		for item in new_names[number_train:]:
			relative_path = os.path.split(item)[1]
			new_path = os.path.join("data/val/", relative_path)
			f.write(new_path + "\n")
Ejemplo n.º 12
0
def evaluate_reconstruction(autoencoder: StyleganAutoencoder, data_loaders: dict) -> dict:
    metrics = defaultdict(list)
    psnr_ssim_evaluator = PSNRSSIMEvaluator()

    for i, batch in tenumerate(data_loaders['test'], desc="psnr_ssim", leave=False):
        batch = {k: v.to('cuda') for k, v in batch.items()}
        with torch.no_grad():
            denoised = autoencoder(batch['input_image'])

        psnr, ssim = psnr_ssim_evaluator.psnr_and_ssim(denoised, batch['output_image'])

        metrics['psnr'].append(float(psnr.cpu().numpy()))
        metrics['ssim'].append(float(ssim.cpu().numpy()))
    metrics = {k: statistics.mean(v) for k, v in metrics.items()}
    return metrics
Ejemplo n.º 13
0
def download_work(out_dir, work_url):
    work_req = requests.get(work_url)
    work_name = work_url.split("/")[4]
    work_dir = out_dir / work_name
    work_dir.mkdir(exist_ok=True)
    soup = BeautifulSoup(work_req.text, features="html.parser")
    image_tags = soup.find_all(class_="panelarea")
    for i, tag in tenumerate(image_tags, desc=f"{work_name}"):
        image_url = tag.get("href")
        output_name = image_url.split("/")[-1]
        output_filename = work_dir / f"{i:03}-{output_name}"

        image_req = requests.get(image_url)
        with open(output_filename, "wb") as f:
            f.write(image_req.content)
Ejemplo n.º 14
0
def _run_solver(solver,
                t_span: Tuple[float, float],
                dt: float,
                desc: str = "solver") -> SolverResult:
    """Given a numerical integrator, call its 'step' method T/dt times (where T is last element of t_span)."""
    t_start, t_end = t_span
    t_arr = np.arange(t_start, t_end + dt, dt)
    y_arr = np.zeros(shape=(len(t_arr), len(solver.y)))
    logger.info(f"{len(t_arr)} iterations to do...")
    for i, t in tenumerate(t_arr, desc=desc):
        y_arr[i] = solver.y
        solver.step(t, dt)
    # add final y
    y_arr[-1] = solver.y
    return SolverResult(t_arr, y_arr.T, None, None, None, 0, 0, 0, 1,
                        "success", True)
Ejemplo n.º 15
0
 def match_variable_distance(self, list_index=None):
     # logger = lm.logger
     # logger.setLevel(logging.DEBUG)
     # logger.addHandler(logging.StreamHandler(sys.stdout))
     tic = time.time()
     traj_mov_match = []
     fails = []
     point_traj = self.list_traj
     if list_index is not None:
         point_traj = [
             j for i, j in enumerate(point_traj) if i in list_index
         ]
     for i, j in tenumerate(point_traj):
         start_time = time.time()
         dist_init = self.max_init
         dist = self.max_d
         fail = 0
         while True:
             try:
                 traj = map_matching(j, self.gdf_netw, self.map, dist_init,
                                     dist)
                 traj_mov_match.append(traj)
                 break
             except Exception:
                 if fail < 3:
                     # print('Set distance higher:')
                     dist += 5
                     fail = fail + 1
                     # print(dist)
                     # print('Number of fails: ' + str(fail))
                 elif 2 < fail <= 10:
                     dist += 10
                     fail = fail + 1
                     # print('Set distance higher:')
                     # print(dist)
                     # print('Number of fails: ' + str(fail))
                 elif fail > 10:
                     dist += 10
                     dist_init += 50
                     fail += 1
                 # print('Still at list ' + str(i))
         fails.append(fail)
     toc = time.time()
     print(
         f'{int(divmod(toc - tic, 60)[0])} min {int(divmod(toc - tic, 60)[1])} sec'
     )
     return traj_mov_match
Ejemplo n.º 16
0
def get_wordlist(corpus):
    word_list = []
    word_count = 0
    vocab = set()

    if corpus == 'reuters':
        corp = reuters
        file_ids = corp.fileids()
        for file_ix, f in tenumerate(file_ids, desc='articles'):
            if f.startswith('train'):
                #for word_ix, word in tenumerate(reuters.words(f), desc='words'):
                word_list = reuters.words(f)
                word_count += len(word_list)
                for word_ix, word in enumerate(word_list):
                    vocab.add(word)
        return vocab, word_count
    else:
        return None, None
Ejemplo n.º 17
0
 def select_rows(self, segment_index=None):
     gdf_list = self.point_trajectories
     gdf_netw = self.used_network
     if segment_index is None:
         segment_index = list(np.arange(0, len(gdf_netw), 1))
     traj_eval = []
     for ind, traj in tenumerate(gdf_list):
         tr = traj.drop(['Lon', 'Lat'], axis=1)
         tr_first = tr.drop_duplicates('N1_match', keep='first')
         tr_first = tr_first.rename(columns={
             'N1_match': 'N1',
             'N2_match': 'N2'
         })
         idx_first = list(tr_first.index)
         tr_first = pd.merge(
             tr_first,
             gdf_netw[['N1', 'N2', 'Long1', 'Lat1',
                       'length']].loc[segment_index],
             how='left',
             on=['N1', 'N2'])
         tr_first = tr_first.rename(columns={'Long1': 'Lon', 'Lat1': 'Lat'})
         tr_first = tr_first.rename(columns={'Long1': 'Lon', 'Lat1': 'Lat'})
         tr_first = tr_first.assign(index=idx_first)
         tr_last = tr.drop_duplicates('N1_match', keep='last')
         tr_last = tr_last.rename(columns={
             'N1_match': 'N1',
             'N2_match': 'N2'
         })
         idx_last = list(tr_last.index)
         tr_last = pd.merge(
             tr_last,
             gdf_netw[['N1', 'N2', 'Long2', 'Lat2',
                       'length']].loc[segment_index],
             how='left',
             on=['N1', 'N2'])
         tr_last = tr_last.rename(columns={'Long2': 'Lon', 'Lat2': 'Lat'})
         tr_last = tr_last.assign(index=idx_last)
         tr_sel = pd.concat([tr_first, tr_last])
         tr_sel = tr_sel.sort_values(by='index')
         df = traj.loc[idx_first + idx_last]
         df = df.sort_index()
         traj_eval.append([tr_sel, df])
     return traj_eval
def evaluate_denoising(args):
    config = load_config(args.model_checkpoint, None)
    args.test_dataset = Path(args.test_dataset)

    assert config['denoising'] is True or config['black_and_white_denoising'] is True, "you are supplying a train run that has not been trained for denoising! Aborting"

    autoencoder = get_autoencoder(config).to(args.device)
    autoencoder = load_weights(autoencoder, args.model_checkpoint, key='autoencoder', strict=True)

    config['batch_size'] = 1
    data_loader = build_data_loader(args.test_dataset, config, config['absolute'], shuffle_off=True, dataset_class=DenoisingEvaluationDataset)

    metrics = defaultdict(list)
    psnr_ssim_evaluator = PSNRSSIMEvaluator()

    train_run_root_dir = Path(args.model_checkpoint).parent.parent
    evaluation_root = train_run_root_dir / 'evaluation' / f"denoise_{args.dataset_name}"
    evaluation_root.mkdir(parents=True, exist_ok=True)

    for i, batch in tenumerate(data_loader, leave=False):
        batch = {k: v.to(args.device) for k, v in batch.items()}
        with torch.no_grad():
            denoised = autoencoder(batch['noisy'])

        noisy = clamp_and_unnormalize(batch['noisy'])
        original = clamp_and_unnormalize(batch['original'])
        denoised = clamp_and_unnormalize(denoised)

        if args.save:
            save_dir = evaluation_root / "qualitative" / args.test_dataset.stem
            save_dir.mkdir(exist_ok=True, parents=True)
            save_images([original[0], noisy[0], denoised[0]], save_dir, i)

        psnr, ssim = psnr_ssim_evaluator.psnr_and_ssim(denoised, original)

        metrics['psnr'].append(float(psnr.cpu().numpy()))
        metrics['ssim'].append(float(ssim.cpu().numpy()))

    metrics = {k: statistics.mean(v) for k, v in metrics.items()}

    evaluation_file = evaluation_root / f'denoising_{args.test_dataset.stem}.json'
    with evaluation_file.open('w') as f:
        json.dump(metrics, f, indent='\t')
Ejemplo n.º 19
0
 def map_matching_result_split(self):
     trajectories_moving = []
     netw = []
     nan_traj = []
     for i, j in tenumerate(self.list_mm):
         j[0]['edge'] = list(
             zip(j[0]['N1_match'].values, j[0]['N2_match'].values))
         if np.any(np.isnan(np.sum(j[0]['edge'].values))):
             nan_traj.append(j[0])
             continue
         trajectories_moving.append(j[0])
         netw.append(j[1])
     used_network = pd.concat(netw, axis=0)
     used_network.drop_duplicates(subset=['N1', 'N2'], inplace=True)
     used_network.reset_index(inplace=True, drop=True)
     used_network['edge'] = [
         tuple(xy) for xy in zip(used_network['N1'], used_network['N2'])
     ]
     self.point_trajectories = trajectories_moving
     self.nan_traj = nan_traj
     self.used_network = used_network
Ejemplo n.º 20
0
def parse_submission(lines):
    submissionlist = []
    for i, line in tenumerate(lines):
        try:
            jline = json.loads(line)
        except UnicodeDecodeError:
            print ('Decoding error on line %i', i)
            continue
        #cheaper than going over every key
        try:
            dummy = jline['author']
        except KeyError: 
            jline['author'] = None 

        submissionlist.append(Submission(jline['subreddit'], 
                                         jline['author'], 
                                         jline['name'], 
                                         jline['title'], 
                                         jline['url'],
                                         jline['selftext']))

    return submissionlist    
Ejemplo n.º 21
0
def process_data(cur, conn, filepath, func):
    """
    Loads all files in the specified filepath and applies func to each file.
    :param cur: the database cursor.
    :param conn: the database connection.
    :param filepath: filepath to load files from.
    :param func: function to apply to each file.
    """
    # get all files matching extension from directory
    all_files = []
    for root, dirs, files in os.walk(filepath):
        files = glob.glob(os.path.join(root, "*.json"))
        for f in files:
            all_files.append(os.path.abspath(f))

    # get total number of files found
    num_files = len(all_files)
    print("{} files found in {}".format(num_files, filepath))

    # iterate over files and process
    for i, datafile in tenumerate(all_files, 1):
        func(cur, datafile)
        conn.commit()
 def match_fixed_distance(self, list_index=None, logger=False, **kwargs):
     if logger:
         logger = lm.logger
         logger.setLevel(logging.DEBUG)
         logger.addHandler(logging.StreamHandler(sys.stdout))
     tic = time.time()
     traj_mov_match = []
     special_cases = []
     point_traj = self.list_traj
     if list_index is not None:
         point_traj = [
             j for i, j in enumerate(point_traj) if i in list_index
         ]
     for i, j in tenumerate(point_traj):
         while True:
             try:
                 traj = map_matching(j,
                                     self.network_edges,
                                     self.map,
                                     self.max_init,
                                     self.max_d,
                                     latlon=self.match_latlon,
                                     **kwargs)
                 traj = traj.merge(self.network_edges[['_id', 'n1', 'n2']],
                                   how='left',
                                   on=['n1', 'n2'])
                 traj_mov_match.append(traj)
                 break
             except Exception:
                 special_cases.append(j)
                 break
     toc = time.time()
     print(
         f'{int(divmod(toc - tic, 60)[0])} min {int(divmod(toc - tic, 60)[1])} sec'
     )
     return traj_mov_match, special_cases
Ejemplo n.º 23
0
import torch
from datasets import load_dataset
from sklearn.metrics import roc_auc_score
from torch.utils.data import DataLoader
from tqdm.contrib import tenumerate
import numpy as np

from project.binary_bert.utils import load_binary_bert

dataset = load_dataset("civil_comments", split='test')
dataloader = DataLoader(dataset, batch_size=8)
model, tokenizer, class_names = load_binary_bert()
true, pred = defaultdict(list), defaultdict(list)
model.eval()
with torch.no_grad():
    for id, batch in tenumerate(dataloader, total=len(dataloader)):
        inputs = tokenizer(batch['text'],
                           return_tensors="pt",
                           truncation=True,
                           padding=True).to(model.device)
        out = model(inputs['input_ids'])
        scores = torch.sigmoid(out[0]).cpu().detach().numpy()
        results = {}
        for i, cla in enumerate(class_names):
            results[cla] = (
                scores[0][i] if isinstance(batch['text'], str) else
                [scores[ex_i][i].tolist() for ex_i in range(len(scores))])
            if cla == 'identity_hate':
                batch_cla = 'identity_attack'
            else:
                batch_cla = cla
Ejemplo n.º 24
0
def enumerator(iterable: Iterable, verbose: bool, **kwargs):
    if not verbose:
        return enumerate(iterable)

    return tenumerate(iterable, **kwargs)
Ejemplo n.º 25
0
def test_enumerate_numpy():
    """Test contrib.tenumerate(numpy.ndarray)"""
    np = importorskip('numpy')
    with closing(StringIO()) as our_file:
        a = np.random.random((42, 7))
        assert list(tenumerate(a, file=our_file)) == list(np.ndenumerate(a))
Ejemplo n.º 26
0
def test_enumerate():
    """Test contrib.tenumerate"""
    with closing(StringIO()) as our_file:
        a = range(9)
        assert list(tenumerate(a, file=our_file)) == list(enumerate(a))
        assert list(tenumerate(a, 42, file=our_file)) == list(enumerate(a, 42))
Ejemplo n.º 27
0
    def test(self, merge=False, merge_size=0):
        with fluid.dygraph.guard():
            """ Test """
            """ Network """
            self.generator_ema = Generator(self.img_size, self.img_ch, self.style_dim,
                                           max_conv_dim=self.hidden_dim, sn=False, w_hpf=self.w_hpf)
            self.mapping_network_ema = MappingNetwork(self.style_dim, self.hidden_dim, self.num_domains, sn=False)
            self.style_encoder_ema = StyleEncoder(self.img_size, self.style_dim, self.num_domains,
                                                  max_conv_dim=self.hidden_dim, sn=False)
            self.fan = FAN(fname_pretrained='fan')

            """ Load model """
            self.load_model(choice='test')

            source_path = os.path.join(self.test_dataset_path, 'src_imgs')
            source_images = glob(os.path.join(source_path, '*.png')) + glob(os.path.join(source_path, '*.jpg'))
            source_images = sorted(source_images)

            # reference-guided synthesis
            print('reference-guided synthesis')
            reference_path = os.path.join(self.test_dataset_path, 'ref_imgs')
            reference_images = []
            reference_domain = []

            for idx, domain in enumerate(self.domain_list):
                image_list = glob(os.path.join(reference_path, domain) + '/*.png') + glob(
                    os.path.join(reference_path, domain) + '/*.jpg')
                image_list = sorted(image_list)
                domain_list = [[idx]] * len(image_list)  # [ [0], [0], ... , [0] ]

                reference_images.extend(image_list)
                reference_domain.extend(domain_list)

            if merge:
                src_img = None
                ref_img = None
                ref_img_domain = None

                if merge_size == 0:
                    # [len_src_imgs : len_ref_imgs] matching
                    for src_idx, src_img_path in tenumerate(source_images):
                        src_name, src_extension = os.path.splitext(src_img_path)
                        src_name = os.path.basename(src_name)

                        src_img_ = load_images(src_img_path, self.img_size, self.img_ch)  # [img_size, img_size, img_ch]

                        src_img_ = paddle.fluid.layers.unsqueeze(src_img_, axes=[0])

                        if src_idx == 0:
                            src_img = src_img_
                        else:
                            src_img = paddle.fluid.layers.concat([src_img, src_img_], axis=0)

                    for ref_idx, (ref_img_path, ref_img_domain_) in tenumerate(zip(reference_images, reference_domain)):
                        ref_name, ref_extension = os.path.splitext(ref_img_path)
                        ref_name = os.path.basename(ref_name)

                        ref_img_ = load_images(ref_img_path, self.img_size, self.img_ch)  # [img_size, img_size, img_ch]
                        ref_img_ = paddle.fluid.layers.unsqueeze(ref_img_, axes=[0])
                        ref_img_domain_ = np.mat(ref_img_domain_)
                        ref_img_domain_ = fluid.dygraph.to_variable(np.array(ref_img_domain_))
                        if ref_idx == 0:
                            ref_img = ref_img_
                            ref_img_domain = ref_img_domain_
                        else:
                            ref_img = paddle.fluid.layers.concat([ref_img, ref_img_], axis=0)
                            ref_img_domain = paddle.fluid.layers.concat([ref_img_domain, ref_img_domain_], axis=0)
                    save_path = './{}/ref_all.jpg'.format(self.result_dir)
                    self.refer_canvas(src_img, ref_img, ref_img_domain, save_path,
                                      img_num=[len(source_images), len(reference_images)])


                else:
                    # [merge_size : merge_size] matching
                    src_size = 0
                    for src_idx, src_img_path in tenumerate(source_images):
                        src_name, src_extension = os.path.splitext(src_img_path)
                        src_name = os.path.basename(src_name)

                        src_img_ = load_images(src_img_path, self.img_size, self.img_ch)  # [img_size, img_size, img_ch]
                        src_img_ = paddle.fluid.layers.unsqueeze(src_img_, axes=[0])

                        if src_size < merge_size:
                            if src_idx % merge_size == 0:
                                src_img = src_img_
                            else:
                                src_img = paddle.fluid.layers.concat([src_img, src_img_], axis=0)
                            src_size += 1

                            if src_size == merge_size:
                                src_size = 0

                                ref_size = 0
                                for ref_idx, (ref_img_path, ref_img_domain_) in enumerate(
                                        zip(reference_images, reference_domain)):
                                    ref_name, ref_extension = os.path.splitext(ref_img_path)
                                    ref_name = os.path.basename(ref_name)

                                    ref_img_ = load_images(ref_img_path, self.img_size,
                                                           self.img_ch)  # [img_size, img_size, img_ch]
                                    ref_img_ = paddle.fluid.layers.unsqueeze(ref_img_, axes=[0])
                                    ref_img_domain_ = paddle.fluid.layers.unsqueeze(ref_img_domain_, axes=[0])

                                    if ref_size < merge_size:
                                        if ref_idx % merge_size == 0:
                                            ref_img = ref_img_
                                            ref_img_domain = ref_img_domain_
                                        else:
                                            ref_img = paddle.fluid.layers.concat([ref_img, ref_img_], axis=0)
                                            ref_img_domain = paddle.fluid.layers.concat(
                                                [ref_img_domain, ref_img_domain_],
                                                axis=0)

                                        ref_size += 1
                                        if ref_size == merge_size:
                                            ref_size = 0

                                            save_path = './{}/ref_{}_{}.jpg'.format(self.result_dir, src_idx + 1,
                                                                                    ref_idx + 1)

                                            self.refer_canvas(src_img, ref_img, ref_img_domain, save_path,
                                                              img_num=merge_size)

            else:
                # [1:1] matching
                for src_img_path in tqdm(source_images):
                    src_name, src_extension = os.path.splitext(src_img_path)
                    src_name = os.path.basename(src_name)

                    src_img = load_images(src_img_path, self.img_size, self.img_ch)  # [img_size, img_size, img_ch]
                    src_img = paddle.fluid.layers.unsqueeze(src_img, axes=[0])

                    for ref_img_path, ref_img_domain in zip(reference_images, reference_domain):
                        ref_name, ref_extension = os.path.splitext(ref_img_path)
                        ref_name = os.path.basename(ref_name)

                        ref_img = load_images(ref_img_path, self.img_size, self.img_ch)  # [img_size, img_size, img_ch]
                        ref_img = paddle.fluid.layers.unsqueeze(ref_img, axes=[0])
                        ref_img_domain = paddle.fluid.layers.unsqueeze(ref_img_domain, axes=[0])

                        save_path = './{}/ref_{}_{}{}'.format(self.result_dir, src_name, ref_name, src_extension)

                        self.refer_canvas(src_img, ref_img, ref_img_domain, save_path, img_num=1)

            # latent-guided synthesis
            print('latent-guided synthesis')
            for src_img_path in tqdm(source_images):
                src_name, src_extension = os.path.splitext(src_img_path)
                src_name = os.path.basename(src_name)

                src_img = load_images(src_img_path, self.img_size, self.img_ch)  # [img_size, img_size, img_ch]
                src_img = paddle.fluid.layers.unsqueeze(src_img, axes=[0])

                save_path = './{}/latent_{}{}'.format(self.result_dir, src_name, src_extension)

                self.latent_canvas(src_img, save_path)
Ejemplo n.º 28
0
                      batch_first=True,
                      lower=True,
                      stop_words=set(string.punctuation))
    LABEL = data.Field(dtype=torch.float,
                       is_target=True,
                       unk_token=None,
                       sequential=False,
                       use_vocab=False)

    df_dataset = pd.read_csv(f'data/{dataset}/data.csv')
    entire_dataset = DataFrameDataset(df_dataset, {
        'text': TEXT,
        'label': LABEL
    })

    df_dataset = pd.read_csv(f'data/{dataset}/data.csv')

    tokenized_input = []
    for i, example in tenumerate(entire_dataset.examples):
        words = list(example.text)
        if len(words) > 0:
            tokenized_input.append(' '.join(words))
        else:
            tokenized_input.append(None)

    df_dataset['text'] = tokenized_input
    df_dataset = df_dataset.replace(to_replace='None', value=np.nan).dropna()
    df_dataset.to_csv(f'data/{dataset}/tokenized_data.csv',
                      index=False,
                      quoting=csv.QUOTE_NONNUMERIC)
Ejemplo n.º 29
0
                                                      options=self.options)
        put_writer.write(table)
        put_writer.close()

    # Request a pyarrow.Table by name
    def get_table(self, name):
        reader = self.con.do_get(flight.Ticket(name.encode('utf8')),
                                 options=self.options)
        return reader.read_all()

    def list_actions(self):
        return self.con.list_actions()

ipc_options = pa.ipc.IpcWriteOptions(compression='zstd')
options = flight.FlightCallOptions(write_options=ipc_options)
client = DemoClient(location, options=options)

dataset, files_list = get_s3_dataset("s3://molbeam/tested")

for count, table in tenumerate(dataset.to_batches(columns=["canonical_ID", "enumerated_smiles", "achiral_fp"]), total=len(files_list)):

    client.cache_table_in_server(files_list[count], table)

@stopwatch
def get_single_table_from_flight_server(target):
    table_received = client.get_table(target)
    return table_received

recieved_table = get_table_from_flight_server(files_list[0])
print(recieved_table)
Ejemplo n.º 30
0
 def distance_point_to_matched_edge(self):
     distances = {
         'ID': [],
         'Type': [],
         'length_traj': [],
         'max_distance': [],
         'median_distance': [],
         'mean_distance': [],
         '99_percentile': [],
         'length_diff': [],
         'length_diff_rel': []
     }  #, 'frechet_distance': []}
     list_distances_traj = []
     for ind, traj in tenumerate(self.point_trajectories):
         distances['ID'].append((ind, traj['Tracked Vehicle'].values[0]))
         distances['Type'].append(traj['Type'].values[0])
         distances['length_traj'].append(len(traj))
         dist = []
         mapped_length = 0
         traj_val = traj[['Lon', 'Lat']].values
         xy_crds = converting_path_to_xy(traj)
         p_xy = xy_crds[['x', 'y']].values
         path_length = similaritymeasures.get_arc_length(p_xy)
         # print(path_length[0])
         traj_match = traj.rename(columns={
             'N1_match': 'N1',
             'N2_match': 'N2'
         })
         match_df = pd.merge(traj_match[['edge']],
                             self.used_network[[
                                 'N1', 'Lat1', 'Long1', 'N2', 'Lat2',
                                 'Long2', 'length', 'edge'
                             ]],
                             how='left',
                             on=['edge'])
         match_val = match_df[[
             'Lat1', 'Long1', 'Lat2', 'Long2', 'edge', 'length'
         ]].values
         #q_1 = [xy for xy in zip(match_df.Lat1.values, match_df.Long1)]
         #idx = [i for i in range(len(q_1)) if q_1[i] != q_1[i-1]]
         #q_1 = list(match_df[['Lat1', 'Long1']].loc[idx].values)
         #if len(q_1) < 1:  # Interpolated points have to be appended
         #    q_1 = [0]
         #q_1 = []
         for row in range(0, len(traj)):
             p = (traj_val[row][1], traj_val[row][0])  # Lat-lon order
             s1 = (match_val[row][0], match_val[row][1])
             s2 = (match_val[row][2], match_val[row][3])
             d, pi, ti = lm_dist.distance_point_to_segment(p, s1, s2)
             dist.append(d)
             if row == 0:
                 mapped_length += match_val[row][5] * (1 - ti)
                 #q_1.append(pi)
             elif row == len(traj) - 1:
                 mapped_length += match_val[row][5] * ti
                 #q_1.append(pi)
             elif 0 < row and match_val[row][4] != match_val[row - 1][4]:
                 #q_1.append(pi)
                 mapped_length += match_val[row][5]
         if match_val[len(traj) - 2][4] == match_val[len(traj) - 1][4]:
             mapped_length -= match_val[len(traj) - 1][5]
         # print(mapped_length)
         diff_len = abs(path_length[0] - mapped_length)
         list_distances_traj.append(dist)
         #q_xy = converting_list_to_xy(q_1)
         #d_fr = similaritymeasures.frechet_dist(p_xy, q_xy.values)
         distances['max_distance'].append(max(dist))
         distances['median_distance'].append(np.median(dist))
         distances['mean_distance'].append(np.mean(dist))
         distances['99_percentile'].append(np.percentile(dist, 99))
         distances['length_diff'].append(diff_len)
         distances['length_diff_rel'].append(diff_len / path_length[0])
         #distances['frechet_distance'].append(d_fr)
     distances = pd.DataFrame(distances)
     return distances, list_distances_traj