def __init__(self, graph, embeddingDimension=200, walkLength = 3, nbContexts = 1, negativeRate=5): """instantiates a Node2Vec algorithm ================================= INPUT : - graph : weighted graph on which the model is going to be trained. <needs to be a networkx graph> - embeddingDimension : dimension of continuous representation of the nodes - walkLength : length of the random walks used to build the contexts - nbContexts : number of contexts computed by node - negativeRate : number of negative samples for each positive sample """ self.embeddingDimension = embeddingDimension self.negativeRate = negativeRate self.walkLength = walkLength self.nbContexts = nbContexts self.contexts = None self.original_graph = graph # for code simplicity, we are going to work on a copy of the graph # where the names of the nodes have been replaced by an index # build a dictionnary to translate nodes name into index nodes = list(graph.nodes) self.node2index = {node:index for index, node in enumerate(nodes)} self.index2node = {index:node for index, node in enumerate(nodes)} # instantiating the new graph new_graph = nx.DiGraph() # adding nodes to the graph for node in prog_bar(nodes): new_graph.add_node(self.node2index[node]) # adding edges to the graph for edge in prog_bar(graph.edges(data = True)): u = self.node2index[edge[0]] v = self.node2index[edge[1]] w = edge[2]['weight'] if 'weight' in edge[2].keys() else 1 if w>0: new_graph.add_edge(u, v, weight = w) self.graph = new_graph self.nb_of_nodes = len(nodes) # initializing the layers self.input2hidden_weights = np.random.uniform(low = -1, high = 1, size = (self.nb_of_nodes, self.embeddingDimension)) self.hidden2output_weights = np.random.uniform(low = -1, high = 1, size = (self.nb_of_nodes, self.embeddingDimension)) # compute a probability distribution to generate negative_sampling probabilities = np.array([max(new_graph.in_degree[i], .001) for i in range(self.nb_of_nodes)]) probabilities = np.power(probabilities, 3/4) probabilities = probabilities / np.sum(probabilities) self.probabilities = probabilities pass
def parsing_attributes(list_of_attributes): tokenized_attributes = [] print('tokenizing attributes ...') for attributes in prog_bar(list_of_attributes): # we tokenize the attributes string tokenized_attributes.append(['<start>'] + tokenize(attributes)) pad_len = len(max(tokenized_attributes, key=len)) padded_attributes = [] print('padding attributes ...') for attributes in prog_bar(tokenized_attributes): att_len = len(attributes) attributes += ['<pad>'] * (pad_len - att_len) padded_attributes.append(attributes[:pad_len] + ['<end>']) return np.array(padded_attributes)
def _create_reduced_point_cloud(data_path, info_path, save_path=None, back=False): with open(info_path, 'rb') as f: kitti_infos = pickle.load(f) for info in prog_bar(kitti_infos): v_path = info['velodyne_path'] v_path = pathlib.Path(data_path) / v_path points_v = np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, 6]) rect = info['calib/R0_rect'] P2 = info['calib/P2'] Trv2c = info['calib/Tr_velo_to_cam'] # first remove z < 0 points # keep = points_v[:, -1] > 0 # points_v = points_v[keep] # then remove outside. if back: points_v[:, 0] = -points_v[:, 0] points_v = box_np_ops.remove_outside_points(points_v, rect, Trv2c, P2, info["img_shape"]) if save_path is None: save_filename = v_path.parent.parent / (v_path.parent.stem + "_reduced") / v_path.name # save_filename = str(v_path) + '_reduced' if back: save_filename += "_back" else: save_filename = str(pathlib.Path(save_path) / v_path.name) if back: save_filename += "_back" with open(save_filename, 'w') as f: points_v.tofile(f)
def parsing_list_of_attributes(list_of_attributes): return_array = [ parsing_attributes(att) for att in prog_bar(list_of_attributes) ] return_array = np.array( return_array) #.reshape(len(list_of_attributes), -1) return return_array
def main(): env = GameEnv(show=False) agent = DQNAgent(env=env) no_episodes = 500 for episode in prog_bar(range(no_episodes), ascii=False, unit="episodes"): state = env.reset().reshape(1, 2) while True: action = agent.predict_action(state) new_state, reward, done = env.action(action) new_state = new_state.reshape(1, 2) agent.remember(state, action, reward, new_state, done) agent.model_train() agent.target_train() state = new_state.reshape(1, 2) if done: if reward == 500: print(f"Completed in episode {episode}") agent.save_model(f"final-{episode}.model") break
def building_data_test(path): """function to build the dictionnary of the data""" print('loading text ...') list_of_attributes = [] # opening the file with open(path, 'r', encoding='utf-8') as file: for line_number, line in prog_bar(enumerate(file)): if line_number == 0: continue # skipping the header line if '[' not in str(line): continue # some lines are fuzzy attributes = str(line).split('",')[0].replace('"', '').replace( '\n', '') # cosmetic # replace name, near and area by placeholders if 'name' in attributes: name = attributes.split('name[')[1].split(']')[0] if 'area' in attributes: area = attributes.split('area[')[1].split(']')[0] if 'near' in attributes: near = attributes.split('near[')[1].split(']')[0] list_of_attributes.append(attributes) return list_of_attributes
def train(self,stepsize, epochs): # running through the epochs for epoch in range(epochs): print("Epoch n° : {}/{} - {}".format(epoch+1, epochs, str(datetime.datetime.now()))) # running through contexts for target_word, context in prog_bar(self.contexts): h = self.input2hidden_weights[target_word,:] # creating negative samples # !!!! there is something fuzzy : the world can be a negative and a positive sample here for context_word in context: # generating negative samples training_outputs = self.create_negative_samples(context_word) # computing EH EH = np.sum([(self.sigmoid(self.hidden2output_weights[:,j], h) - tj)*self.hidden2output_weights[:,j] for j, tj in training_outputs], axis = 0) # updating output layer weights for j, tj in training_outputs: self.hidden2output_weights[:,j] -= stepsize * (self.sigmoid(self.hidden2output_weights[:,j], h)-tj) * h.T # updating input layer wiegths self.input2hidden_weights[target_word, :] -= stepsize * EH.T print("Training ended at ",str(datetime.datetime.now())) pass
def building_data(path): """function to build the dictionnary of the data""" list_of_reviews = [] list_of_attributes = [] # opening the file with open(path, 'r', encoding='utf-8') as file: for line_number, line in prog_bar(enumerate(file)): if line_number == 0: continue # skipping the header line if '[' not in str(line): continue # some lines are fuzzy attributes = str(line).split('",')[0].replace('"', '') # cosmetic review = str(line).split('",')[1].replace('"', '').replace( '\n', '') # cosmetic # replace name, near and area by placeholders if 'name' in attributes: name = attributes.split('name[')[1].split(']')[0] review = review.replace(name, 'nameplaceholder') if 'area' in attributes: area = attributes.split('area[')[1].split(']')[0] review = review.replace(area, 'areaplaceholder') if 'near' in attributes: near = attributes.split('near[')[1].split(']')[0] review = review.replace(near, 'nearplaceholder') review = 'startofreview ' + review + ' endofreview' list_of_reviews.append(review.lower()) list_of_attributes.append(attributes) vocabulary_full = [] reviews_tokens = [] for sentence in prog_bar(list_of_reviews): tokens = tokenize(sentence) vocabulary_full += tokens reviews_tokens.append(tokens) vocabulary = Counter(vocabulary_full) vocabulary = sorted(vocabulary, key=vocabulary.get, reverse=True) mapper = { token: index + 1 for index, token in prog_bar(enumerate(vocabulary)) } mapper['<unk>'] = len(mapper) + 1 mapper['<pad>'] = len(mapper) + 1 return mapper, list_of_attributes, reviews_tokens
def parsing_list_of_attributes_test(list_of_attributes): attributes_list = [] dictionnary_list = [] for att in prog_bar(list_of_attributes): attributes_parsed, dictionnary = parsing_attributes_test(att) attributes_list.append(attributes_parsed) dictionnary_list.append(dictionnary) attributes_list = np.array(attributes_list) return attributes_list, dictionnary_list
def mapping_reviews(list_of_reviews, mapper, review_len=76): nb_of_samples = len(list_of_reviews) mapped_tokens = [] for review in prog_bar(list_of_reviews): mapped_review = [] for token in review: mapped_review.append(mapper.get(token, mapper.get('<unk>')) - 1) mapped_tokens.append(mapped_review) mapped_tokens = np.array(mapped_tokens, ndmin=2).reshape(nb_of_samples, review_len) return mapped_tokens
def padding_review(list_of_reviews, padding_length): """review is a list of tokens""" list_of_padded_reviews = [] for review in prog_bar(list_of_reviews): review_len = len(review) if review_len < padding_length: to_pad = ['<pad>' for _ in range(padding_length - len(review))] else: to_pad = [] padded_review = review + to_pad list_of_padded_reviews.append(padded_review[:padding_length]) return np.array(list_of_padded_reviews)
def building_data_test(path, mapper, padding_length_att=100, limit=None): """function to build the format the training data""" list_of_attributes = [] # opening the file with open(path, 'r', encoding='utf-8') as file: for line_number, line in prog_bar(enumerate(file)): if line_number == 0: continue # skipping the header line if '[' not in str(line): continue # some lines are fuzzy if (limit is not None) and line_number > limit: break attributes = str(line).split('",')[0].replace('"', '') # cosmetic # tokenizing the data attributes = tokenize(attributes) # appending the tokenized reviews and attributes to the output lists list_of_attributes.append(attributes) pad_len_att = padding_length_att # len(max(list_of_attributes, key = len)) # padding attributes padded_attributes = [] print('padding attributes ...') for attributes in prog_bar(list_of_attributes): att_len = len(attributes) attributes += ['<pad>'] * (pad_len_att - att_len) padded_attributes.append(['<start>'] + attributes[:pad_len_att] + ['<end>']) # translating attributes print('translating attributes ...') translated_attributes = [] for att in prog_bar(padded_attributes): translated_att = [] for token in att: translated_att.append(mapper.get(token, mapper.get('<unk>'))) translated_attributes.append(translated_att) return np.array(translated_attributes)
def _calculate_num_points_in_gt(data_path, infos, relative_path, remove_outside=True, num_features=6): for info in prog_bar(infos): if relative_path: v_path = str(pathlib.Path(data_path) / info["velodyne_path"]) else: v_path = info["velodyne_path"] points_v = np.fromfile(v_path, dtype=np.float32, count=-1) annos = info['annos'] try: points_v = points_v.reshape([-1, num_features]) except: annos["num_points_in_gt"] = 0 print('warning:', v_path) continue rect = info['calib/R0_rect'] Trv2c = info['calib/Tr_velo_to_cam'] P2 = info['calib/P2'] if remove_outside: points_v = box_np_ops.remove_outside_points( points_v, rect, Trv2c, P2, info["img_shape"]) # points_v = points_v[points_v[:, 0] > 0] num_obj = len([n for n in annos['name'] if n != 'DontCare']) # annos = kitti.filter_kitti_anno(annos, ['DontCare']) dims = annos['dimensions'][:num_obj] loc = annos['location'][:num_obj] rots = annos['rotation_y'][:num_obj] gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) gt_boxes_lidar = box_np_ops.box_camera_to_lidar( gt_boxes_camera, rect, Trv2c) indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar) num_points_in_gt = indices.sum(0) num_ignored = len(annos['dimensions']) - num_obj num_points_in_gt = np.concatenate( [num_points_in_gt, -np.ones([num_ignored])]) annos["num_points_in_gt"] = num_points_in_gt.astype(np.int32)
'herself', 'it', 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', 'should', 'now', 'a'] # computing wordcount and size of the vocabulary print("Starting word count") self.word_count = {} for sentence in prog_bar(sentences): for word in sentence: if word in self.word_count.keys(): self.word_count[word] += 1 else: self.word_count[word] = 1 self.word_count = pd.Series(self.word_count) print("Word count done") print("Starting word cleaning") # getting rid of too rare words self.word_count = self.word_count[self.word_count >= minCount] # keeping only valid words valid_words = [word for word in self.word_count.index if word not in stopwords] self.word_count = self.word_count.loc[valid_words].sort_values(ascending = False)
def create_groundtruth_database(data_path, info_path=None, used_classes=None, database_save_path=None, db_info_save_path=None, relative_path=True, lidar_only=False, bev_only=False, coors_range=None): root_path = pathlib.Path(data_path) if info_path is None: info_path = root_path / 'level5_infos_train.pkl' if database_save_path is None: database_save_path = root_path / 'gt_database' else: database_save_path = pathlib.Path(database_save_path) if db_info_save_path is None: db_info_save_path = root_path / "level5_dbinfos_train.pkl" database_save_path.mkdir(parents=True, exist_ok=True) with open(info_path, 'rb') as f: level5_infos = pickle.load(f) all_db_infos = {} if used_classes is None: used_classes = list(kitti.get_classes()) used_classes.pop(used_classes.index('DontCare')) for name in used_classes: all_db_infos[name] = [] group_counter = 0 for info in prog_bar(level5_infos): velodyne_path = info['velodyne_path'] if relative_path: # velodyne_path = str(root_path / velodyne_path) + "_reduced" velodyne_path = str(root_path / velodyne_path) num_features = 6 if 'pointcloud_num_features' in info: num_features = info['pointcloud_num_features'] points = np.fromfile(velodyne_path, dtype=np.float32, count=-1).reshape([-1, num_features]) image_idx = info["image_idx"] rect = info['calib/R0_rect'] P2 = info['calib/P2'] Trv2c = info['calib/Tr_velo_to_cam'] if not lidar_only: points = box_np_ops.remove_outside_points(points, rect, Trv2c, P2, info["img_shape"]) annos = info["annos"] names = annos["name"] bboxes = annos["bbox"] difficulty = annos["difficulty"] gt_idxes = annos["index"] num_obj = np.sum(annos["index"] >= 0) rbbox_cam = kitti.anno_to_rbboxes(annos)[:num_obj] rbbox_lidar = box_np_ops.box_camera_to_lidar(rbbox_cam, rect, Trv2c) if bev_only: # set z and h to limits assert coors_range is not None rbbox_lidar[:, 2] = coors_range[2] rbbox_lidar[:, 5] = coors_range[5] - coors_range[2] group_dict = {} group_ids = np.full([bboxes.shape[0]], -1, dtype=np.int64) if "group_ids" in annos: group_ids = annos["group_ids"] else: group_ids = np.arange(bboxes.shape[0], dtype=np.int64) point_indices = box_np_ops.points_in_rbbox(points, rbbox_lidar) for i in range(num_obj): filename = f"{image_idx}_{names[i]}_{gt_idxes[i]}.bin" filepath = database_save_path / filename gt_points = points[point_indices[:, i]] gt_points[:, :3] -= rbbox_lidar[i, :3] with open(filepath, 'w') as f: gt_points.tofile(f) if names[i] in used_classes: if relative_path: db_path = str(database_save_path.stem + "/" + filename) else: db_path = str(filepath) db_info = { "name": names[i], "path": db_path, "image_idx": image_idx, "gt_idx": gt_idxes[i], "box3d_lidar": rbbox_lidar[i], "num_points_in_gt": gt_points.shape[0], "difficulty": difficulty[i], # "group_id": -1, # "bbox": bboxes[i], } local_group_id = group_ids[i] # if local_group_id >= 0: if local_group_id not in group_dict: group_dict[local_group_id] = group_counter group_counter += 1 db_info["group_id"] = group_dict[local_group_id] if "score" in annos: db_info["score"] = annos["score"][i] all_db_infos[names[i]].append(db_info) for k, v in all_db_infos.items(): print(f"load {len(v)} {k} database infos") with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f)
def train(config_path, model_dir, result_path=None, ckpt_path=None, create_folder=False, display_step=50, summary_step=5, pickle_result=True): """train a VoxelNet model specified by a config file. """ if create_folder: if pathlib.Path(model_dir).exists(): model_dir = torchplus.train.create_folder(model_dir) model_dir = pathlib.Path(model_dir) model_dir.mkdir(parents=True, exist_ok=True) eval_checkpoint_dir = model_dir / 'eval_checkpoints' eval_checkpoint_dir.mkdir(parents=True, exist_ok=True) if result_path is None: result_path = model_dir / 'results' config_file_bkp = "pipeline.config" config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) shutil.copyfile(config_path, str(model_dir / config_file_bkp)) input_cfg = config.train_input_reader eval_input_cfg = config.eval_input_reader model_cfg = config.model.second train_cfg = config.train_config class_names = list(input_cfg.class_names) ###################### # BUILD VOXEL GENERATOR ###################### voxel_generator = voxel_builder.build(model_cfg.voxel_generator) ###################### # BUILD TARGET ASSIGNER ###################### bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) ###################### # BUILD NET ###################### center_limit_range = model_cfg.post_center_limit_range net = second_builder.build(model_cfg, voxel_generator, target_assigner) net.cuda() # net_train = torch.nn.DataParallel(net).cuda() print("num_trainable parameters:", len(list(net.parameters()))) # for n, p in net.named_parameters(): # print(n, p.shape) ###################### # BUILD OPTIMIZER ###################### # we need global_step to create lr_scheduler, so restore net first. if ckpt_path is None: torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) else: torchplus.train.restore(ckpt_path, net) gstep = net.get_global_step() - 1 optimizer_cfg = train_cfg.optimizer if train_cfg.enable_mixed_precision: net.half() net.metrics_to_float() net.convert_norm_to_float(net) optimizer = optimizer_builder.build(optimizer_cfg, net.parameters()) if train_cfg.enable_mixed_precision: loss_scale = train_cfg.loss_scale_factor mixed_optimizer = torchplus.train.MixedPrecisionWrapper( optimizer, loss_scale) else: mixed_optimizer = optimizer # must restore optimizer AFTER using MixedPrecisionWrapper if ckpt_path is None: torchplus.train.try_restore_latest_checkpoints(model_dir, [mixed_optimizer]) else: torchplus.train.restore(ckpt_path, mixed_optimizer) lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, optimizer, gstep) if train_cfg.enable_mixed_precision: float_dtype = torch.float16 else: float_dtype = torch.float32 ###################### # PREPARE INPUT ###################### dataset = input_reader_builder.build(input_cfg, model_cfg, training=True, voxel_generator=voxel_generator, target_assigner=target_assigner) eval_dataset = input_reader_builder.build(eval_input_cfg, model_cfg, training=False, voxel_generator=voxel_generator, target_assigner=target_assigner) def _worker_init_fn(worker_id): time_seed = np.array(time.time(), dtype=np.int32) np.random.seed(time_seed + worker_id) print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0]) dataloader = torch.utils.data.DataLoader(dataset, batch_size=input_cfg.batch_size, shuffle=True, num_workers=input_cfg.num_workers, pin_memory=False, collate_fn=merge_second_batch, worker_init_fn=_worker_init_fn) eval_dataloader = torch.utils.data.DataLoader( eval_dataset, batch_size=eval_input_cfg.batch_size, shuffle=False, num_workers=eval_input_cfg.num_workers, pin_memory=False, collate_fn=merge_second_batch) data_iter = iter(dataloader) ###################### # TRAINING ###################### log_path = model_dir / 'log.txt' logf = open(log_path, 'a') logf.write(proto_str) logf.write("\n") summary_dir = model_dir / 'summary' summary_dir.mkdir(parents=True, exist_ok=True) writer = SummaryWriter(str(summary_dir)) total_step_elapsed = 0 remain_steps = train_cfg.steps - net.get_global_step() t = time.time() ckpt_start_time = t total_loop = train_cfg.steps // train_cfg.steps_per_eval + 1 # total_loop = remain_steps // train_cfg.steps_per_eval + 1 clear_metrics_every_epoch = train_cfg.clear_metrics_every_epoch if train_cfg.steps % train_cfg.steps_per_eval == 0: total_loop -= 1 mixed_optimizer.zero_grad() try: for _ in range(total_loop): if total_step_elapsed + train_cfg.steps_per_eval > train_cfg.steps: steps = train_cfg.steps % train_cfg.steps_per_eval else: steps = train_cfg.steps_per_eval for step in range(steps): lr_scheduler.step() try: example = next(data_iter) except StopIteration: print("end epoch") if clear_metrics_every_epoch: net.clear_metrics() data_iter = iter(dataloader) example = next(data_iter) example_torch = example_convert_to_torch(example, float_dtype) batch_size = example["anchors"].shape[0] ret_dict = net(example_torch) # box_preds = ret_dict["box_preds"] cls_preds = ret_dict["cls_preds"] loss = ret_dict["loss"].mean() cls_loss_reduced = ret_dict["cls_loss_reduced"].mean() loc_loss_reduced = ret_dict["loc_loss_reduced"].mean() cls_pos_loss = ret_dict["cls_pos_loss"] cls_neg_loss = ret_dict["cls_neg_loss"] loc_loss = ret_dict["loc_loss"] cls_loss = ret_dict["cls_loss"] dir_loss_reduced = ret_dict["dir_loss_reduced"] cared = ret_dict["cared"] labels = example_torch["labels"] if train_cfg.enable_mixed_precision: loss *= loss_scale loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0) mixed_optimizer.step() mixed_optimizer.zero_grad() net.update_global_step() net_metrics = net.update_metrics(cls_loss_reduced, loc_loss_reduced, cls_preds, labels, cared) step_time = (time.time() - t) t = time.time() metrics = {} num_pos = int((labels > 0)[0].float().sum().cpu().numpy()) num_neg = int((labels == 0)[0].float().sum().cpu().numpy()) if 'anchors_mask' not in example_torch: num_anchors = example_torch['anchors'].shape[1] else: num_anchors = int(example_torch['anchors_mask'][0].sum()) global_step = net.get_global_step() if global_step % display_step == 0: loc_loss_elem = [ float(loc_loss[:, :, i].sum().detach().cpu().numpy() / batch_size) for i in range(loc_loss.shape[-1]) ] metrics["step"] = global_step metrics["steptime"] = step_time metrics.update(net_metrics) metrics["loss"] = {} metrics["loss"]["loc_elem"] = loc_loss_elem metrics["loss"]["cls_pos_rt"] = float( cls_pos_loss.detach().cpu().numpy()) metrics["loss"]["cls_neg_rt"] = float( cls_neg_loss.detach().cpu().numpy()) # if unlabeled_training: # metrics["loss"]["diff_rt"] = float( # diff_loc_loss_reduced.detach().cpu().numpy()) if model_cfg.use_direction_classifier: metrics["loss"]["dir_rt"] = float( dir_loss_reduced.detach().cpu().numpy()) metrics["num_vox"] = int(example_torch["voxels"].shape[0]) metrics["num_pos"] = int(num_pos) metrics["num_neg"] = int(num_neg) metrics["num_anchors"] = int(num_anchors) metrics["lr"] = float( mixed_optimizer.param_groups[0]['lr']) metrics["image_idx"] = example['image_idx'][0] flatted_metrics = flat_nested_json_dict(metrics) flatted_summarys = flat_nested_json_dict(metrics, "/") for k, v in flatted_summarys.items(): if isinstance(v, (list, tuple)): v = {str(i): e for i, e in enumerate(v)} writer.add_scalars(k, v, global_step) else: writer.add_scalar(k, v, global_step) metrics_str_list = [] for k, v in flatted_metrics.items(): if isinstance(v, float): metrics_str_list.append(f"{k}={v:.3}") elif isinstance(v, (list, tuple)): if v and isinstance(v[0], float): v_str = ', '.join([f"{e:.3}" for e in v]) metrics_str_list.append(f"{k}=[{v_str}]") else: metrics_str_list.append(f"{k}={v}") else: metrics_str_list.append(f"{k}={v}") log_str = ', '.join(metrics_str_list) print(log_str, file=logf) print(log_str) ckpt_elasped_time = time.time() - ckpt_start_time if ckpt_elasped_time > train_cfg.save_checkpoints_secs: torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) ckpt_start_time = time.time() total_step_elapsed += steps torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) # Ensure that all evaluation points are saved forever torchplus.train.save_models(eval_checkpoint_dir, [net, optimizer], net.get_global_step(), max_to_keep=100) net.eval() result_path_step = result_path / f"step_{net.get_global_step()}" result_path_step.mkdir(parents=True, exist_ok=True) print("#################################") print("#################################", file=logf) print("# EVAL") print("# EVAL", file=logf) print("#################################") print("#################################", file=logf) print("Generate output labels...") print("Generate output labels...", file=logf) t = time.time() dt_annos = [] # prog_bar = ProgressBar() # prog_bar.start(len(eval_dataset) // eval_input_cfg.batch_size + 1) for example in prog_bar(iter(eval_dataloader)): example = example_convert_to_torch(example, float_dtype) if pickle_result: dt_annos += predict_kitti_to_anno(net, example, class_names, center_limit_range, model_cfg.lidar_input) else: _predict_kitti_to_file(net, example, result_path_step, class_names, center_limit_range, model_cfg.lidar_input) #prog_bar.print_bar() sec_per_ex = len(eval_dataset) / (time.time() - t) print(f"avg forward time per example: {net.avg_forward_time:.3f}") print( f"avg postprocess time per example: {net.avg_postprocess_time:.3f}" ) net.clear_time_metrics() print(f'generate label finished({sec_per_ex:.2f}/s). start eval:') print(f'generate label finished({sec_per_ex:.2f}/s). start eval:', file=logf) gt_annos = [ info["annos"] for info in eval_dataset.dataset.kitti_infos ] if not pickle_result: dt_annos = kitti.get_label_annos(result_path_step) result, mAPbbox, mAPbev, mAP3d, mAPaos = get_official_eval_result( gt_annos, dt_annos, class_names, return_data=True) print(result, file=logf) print(result) writer.add_text('eval_result', result, global_step) for i, class_name in enumerate(class_names): writer.add_scalar('bev_ap:{}'.format(class_name), mAPbev[i, 1, 0], global_step) writer.add_scalar('3d_ap:{}'.format(class_name), mAP3d[i, 1, 0], global_step) writer.add_scalar('aos_ap:{}'.format(class_name), mAPaos[i, 1, 0], global_step) writer.add_scalar('bev_map', np.mean(mAPbev[:, 1, 0]), global_step) writer.add_scalar('3d_map', np.mean(mAP3d[:, 1, 0]), global_step) writer.add_scalar('aos_map', np.mean(mAPaos[:, 1, 0]), global_step) result = get_coco_eval_result(gt_annos, dt_annos, class_names) print(result, file=logf) print(result) if pickle_result: with open(result_path_step / "result.pkl", 'wb') as f: pickle.dump(dt_annos, f) writer.add_text('eval_result', result, global_step) net.train() except Exception as e: torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) logf.close() raise e # save model before exit torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) logf.close()
reverse_mapper) review_pred = create_review(tokens[0]) review_act = generate_list_of_tokens(real_review, reverse_mapper) review_act = create_review(review_act[0]) print('actual : ' + review_act) print('predicted : ' + review_pred) predictions += [ model.predict(attributes[(i + 1) * 100:(i + 2) * 100, :, :]) ] rouge_scores_1 = [] rouge_scores_2 = [] rouge_scores_3 = [] bleu_score = [] for index_batch, predictions_batch in prog_bar(enumerate(predictions)): for index, predicted_review in enumerate(predictions_batch): predicted_review_tokens = np.argmax(predicted_review, axis=-1) actual_review = reviews[index_batch * 100 + index] rouge_scores_1.append( compute_rouge_score(actual_review, predicted_review_tokens, n=1)) rouge_scores_2.append( compute_rouge_score(actual_review, predicted_review_tokens, n=2)) rouge_scores_3.append( compute_rouge_score(actual_review, predicted_review_tokens, n=3))
def evaluate(config_path, model_dir, result_path=None, predict_test=False, ckpt_path=None, ref_detfile=None, pickle_result=True, angle_deg=0.0): model_dir = pathlib.Path(model_dir) if predict_test: result_name = 'predict_test' else: result_name = 'eval_results' if result_path is None: result_path = model_dir / result_name else: result_path = pathlib.Path(result_path) config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) input_cfg = config.eval_input_reader model_cfg = config.model.second train_cfg = config.train_config class_names = list(input_cfg.class_names) center_limit_range = model_cfg.post_center_limit_range ###################### # BUILD VOXEL GENERATOR ###################### voxel_generator = voxel_builder.build(model_cfg.voxel_generator) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) net = second_builder.build(model_cfg, voxel_generator, target_assigner) net.cuda() if train_cfg.enable_mixed_precision: net.half() net.metrics_to_float() net.convert_norm_to_float(net) if ckpt_path is None: torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) else: torchplus.train.restore(ckpt_path, net) eval_dataset = input_reader_builder.build(input_cfg, model_cfg, training=False, voxel_generator=voxel_generator, target_assigner=target_assigner, angle_deg=angle_deg) eval_dataloader = torch.utils.data.DataLoader( eval_dataset, batch_size=input_cfg.batch_size, shuffle=False, num_workers=input_cfg.num_workers, pin_memory=False, collate_fn=merge_second_batch) if train_cfg.enable_mixed_precision: float_dtype = torch.float16 else: float_dtype = torch.float32 net.eval() result_path_step = result_path / f"step_{net.get_global_step()}" result_path_step.mkdir(parents=True, exist_ok=True) t = time.time() dt_annos = [] global_set = None print("Generate output labels...") # bar = ProgressBar() # bar.start(len(eval_dataset) // input_cfg.batch_size + 1) for example in prog_bar(iter(eval_dataloader)): example = example_convert_to_torch(example, float_dtype) if pickle_result: dt_annos += predict_kitti_to_anno(net, example, class_names, center_limit_range, model_cfg.lidar_input, global_set) else: _predict_kitti_to_file(net, example, result_path_step, class_names, center_limit_range, model_cfg.lidar_input) # bar.print_bar() sec_per_example = len(eval_dataset) / (time.time() - t) print(f'generate label finished({sec_per_example:.2f}/s). start eval:') print(f"avg forward time per example: {net.avg_forward_time:.3f}") print(f"avg postprocess time per example: {net.avg_postprocess_time:.3f}") if not predict_test: gt_annos = [info["annos"] for info in eval_dataset.dataset.kitti_infos] if not pickle_result: dt_annos = kitti.get_label_annos(result_path_step) # result = get_official_eval_result(gt_annos, dt_annos, class_names) # print(result) # result = get_coco_eval_result(gt_annos, dt_annos, class_names) # print(result) if pickle_result: with open(result_path_step / ("result_%03d.pkl" % angle_deg), 'wb') as f: pickle.dump(dt_annos, f)
def building_data_train(path, limit=None): """function to build the dictionnary of the data""" list_of_reviews = [] list_of_attributes = [] # opening the file with open(path, 'r', encoding='utf-8') as file: for line_number, line in prog_bar(enumerate(file)): if line_number == 0: continue # skipping the header line if '[' not in str(line): continue # some lines are fuzzy if (limit is not None) and line_number > limit: break attributes = str(line).split('",')[0].replace('"', '') # cosmetic review = str(line).split('",')[1].replace('"', '').replace( '\n', '') # cosmetic # tokenizing the data review = tokenize(review) attributes = tokenize(attributes) # appending the tokenized reviews and attributes to the output lists list_of_reviews.append(review) list_of_attributes.append(attributes) pad_len_rev = len(max(list_of_reviews, key=len)) pad_len_att = len(max(list_of_attributes, key=len)) # padding attributes padded_attributes = [] print('padding attributes ...') for attributes in prog_bar(list_of_attributes): att_len = len(attributes) attributes += ['<pad>'] * (pad_len_att - att_len) padded_attributes.append(['<start>'] + attributes[:pad_len_att] + ['<end>']) # padding the reviews padded_reviews = [] print('padding reviews ...') for review in prog_bar(list_of_reviews): rev_len = len(review) review += ['<pad>'] * (pad_len_rev - rev_len) padded_reviews.append(['<start>'] + review[:pad_len_rev] + ['<end>']) # building vocabularies vocabulary_attributes = set( [token for att in padded_attributes for token in att]) vocabulary_reviews = set( [token for review in padded_reviews for token in review]) mapper = vocabulary_reviews.union(vocabulary_attributes) mapper = {token: index for index, token in enumerate(mapper)} # translating attributes print('translating attributes ...') translated_attributes = [] for att in prog_bar(padded_attributes): translated_att = [] for token in att: translated_att.append(mapper.get(token)) translated_attributes.append(translated_att) # translating reviews print('translating reviews ...') translated_reviews = [] for review in prog_bar(padded_reviews): translated_rev = [] for token in review: translated_rev += [mapper[token]] translated_reviews.append(translated_rev) target_reviews = [[mapper.get('<pad>')] + translated_rev for translated_rev in translated_reviews] source_reviews = [ translated_rev + [mapper.get('<pad>')] for translated_rev in translated_reviews ] return mapper, np.array(translated_attributes), np.array( source_reviews), np.array(target_reviews)
'r') as file2: dec = model_from_json(str(json.load(file2))) enc.load_weights(os.path.join(PATH_TO_MODEL, 'enc_weights.h5')) dec.load_weights(os.path.join(PATH_TO_MODEL, 'dec_weights.h5')) print("####### ENCODER #######") enc.summary() print("####### DECODER #######") dec.summary() print('predicting the reviews ...') predictions = [ make_prediction(att, encoder=enc, decoder=dec, mapper=mapper, review_length=100, vocabulary_size=len(mapper)) for att in prog_bar(attributes_test) ] print(predictions) print('cleaning the reviews ...') reviews = generate_list_of_tokens(predictions, reverse_mapper) reviews = [create_review(rev) for rev in reviews] print(reviews) with open(PATH_TO_OUTPUT, 'w') as output_file: for rev in reviews: output_file.write(rev + '\n')