def walk_dict(parent, jdict, size_dict=None, method_dict=None, root_name=None): for (d, x) in jdict.items(): path = os.path.join(parent, d) size = utils.get_path_size(path) key = d if root_name is not None: key = utils.get_folder_name(path, root_name) # print("key:" + key) if size_dict is not None and isinstance(size_dict, dict): size_dict[key] = size count = None if method_dict is not None: count = get_method_counts_in_file(path) if count is not None: # print("d:" + d + " count:" + count) method_dict[key] = count method_count = "method:" if count is None: method_count = "" else: method_count += str(count) print("path:%-30s | size: %-12s | %-17s" % ( key, utils.get_size_in_nice_string(size), method_count)) if isinstance(x, dict): walk_dict(path, x, size_dict, method_dict, root_name) else: pass
def check_apk_alpha(apk, apk_dir, ignore9, value=255): # 遍历要扫描的文件夹s count = 0 # 三个参数:分别返回1.父目录 2.所有文件夹名字(不含路径) 3.所有文件名字 for parent, dir_names, filenames in os.walk(apk_dir): for filename in filenames: # 获取文件的绝对路径 path = os.path.join(parent, filename) # 过滤文件类型 if not os.path.splitext(filename)[1] in ALPHA_IMAGE_FORMAT: continue # 判断文件存在 if not filename: continue # 过滤.9 if ignore9 and ".9" in filename: continue # 检查文件类型 mode = check_img_mode(path, value) if mode == 'RGB': image_path = utils.get_folder_name(parent, apk_dir) + os.sep + filename print ('IMAGE:' + image_path) count += 1 if count > 0: print('These %s image(s) may be pngs with no alpha, considering jpeg?' % count)
def randomize_file_names(): random_name = utils.get_random_name() folder_name = utils.get_folder_name() for file in os.listdir(folder_name): path_to_file = os.path.join(folder_name, file) if os.path.isfile(path_to_file): file_ext = os.path.splitext(file)[1] os.rename(path_to_file, f"{folder_name}/{random_name}{file_ext}")
def check_image_limit(apk, apk_dir, limit=40000): count = 0 for parent, dir_names, filenames in os.walk(apk_dir): for filename in filenames: # 获取文件的绝对路径 path = os.path.join(parent, filename) # 过滤文件类型 if not os.path.splitext(filename)[1] in LIMIT_IMAGE_FORMAT: continue # 判断文件存在 if not filename: continue file_size = utils.get_path_size(path) if long(file_size) > long(limit): image_path = utils.get_folder_name(parent, apk_dir) + os.sep + filename print('IMAGE:%s size:%s' % (image_path, utils.get_size_in_nice_string(file_size))) count += 1 if count > 0: print("These files may be too large.(larger than %s)" % utils.get_size_in_nice_string(int(limit)))
def use_clearml(taskid=None): """ does setup for clearml connection. Args: taskid: id of experiment to be reused. default is a new experiment. Returns: clearml_logger, task object. """ if cfg['checkpoint']['use_saved']: cfg['checkpoint']['saved_path'] = cfg['checkpoint']['run_name'] task = Task.init(continue_last_task=True, reuse_last_task_id=taskid) task.set_initial_iteration(0) # task = Task.get_task(task_id='4f8b87a1e1684be9a8e34ede211d3233') # project_name='ariel-mde', task_name=get_folder_name()) else: task = Task.init(project_name='ariel-mde', task_name=get_folder_name()) config_file = task.connect_configuration(Path('configs.yml'), 'experiment_config') task_cfg = task.connect( cfg) # enabling configuration override by clearml set_cfg(task_cfg) clearml_logger = task.get_logger() return clearml_logger, task
code_declaration = data[0] chapter_label = data[1] heading_label = data[2] sub_heading_label = data[3] country_extension_label = data[4] n_chapter_classes = chapter_label.shape[1] n_heading_classes = heading_label.shape[1] n_sub_heading_classes = sub_heading_label.shape[1] n_country_extension_classes = country_extension_label.shape[1] model = HierarchicalModel(n_chapter_classes, n_heading_classes, n_sub_heading_classes, n_country_extension_classes) weight_folder_dir = get_folder_name('model_weights/model_{}') os.mkdir(weight_folder_dir) weight_path = weight_folder_dir + "/model_weight.ckpt" model_parameters = model.get_parameters() with open(os.path.join(weight_folder_dir, 'model_parameters.json'), 'w') as outfile: json.dump(model_parameters, outfile) checkpoint = ModelCheckpoint(weight_path, save_weights_only=True, monitor='loss', verbose=2, save_best_only=True, mode='min') optimizer = Adam(args.learning_rate) model.compile(loss='categorical_crossentropy',optimizer = optimizer, metrics=['accuracy']) early_stop = EarlyStopping(monitor='val_loss', mode='min', patience=8, verbose=2, restore_best_weights=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=3,verbose=2,min_lr=1e-5) print("Traning model with batch size {} and {} epochs".format(args.batch, args.epochs))
def train(): """ main train loop. all configurations are taken from the configs.yml file. Returns: None, saves checkpoints of net as it trains into file (+ is saved by clearml). """ logger.info('getting params, dataloaders, etc...') cfg_train = cfg['train'] cfg_checkpoint = cfg['checkpoint'] cfg_validation = cfg['validation'] epochs = cfg_train['epochs'] print_every = cfg_train['print_every'] save_every = cfg_checkpoint['save_every'] folder_name = get_folder_name() writer = SummaryWriter(os.path.join('runs', folder_name)) loaders = get_loaders() train_loader, val_loader = None, None len_loaders = len(loaders) if len_loaders == 4: train_loader, val_loader, test_loader, depth_postprocessing = loaders if len_loaders == 3: train_loader, val_loader, depth_postprocessing = loaders elif len_loaders == 2: train_loader, val_loader = loaders depth_postprocessing = None elif len_loaders == 1: train_loader = loaders[0] depth_postprocessing = None assert train_loader is not None and ( val_loader is not None or not cfg_validation['val_round']), "problem with loader." n_batches = len(train_loader) cfg_model = cfg['model'] cfg_checkpoint = cfg['checkpoint'] cfg_optim = cfg['optim'] if cfg_checkpoint['use_saved']: net, optimizer, epoch_start, running_loss = load_checkpoint() criterion = get_loss_function() epoch_start = epoch_start + 1 # since we stopped at the last epoch, continue from the next. else: criterion, net, optimizer = get_net() running_loss = 0.0 epoch_start = 0 if cfg_optim['use_lr_scheduler']: old_lr = optimizer.param_groups[0]['lr'] scheduler = ReduceLROnPlateau(optimizer, mode='min') logger.info('got all params, starting train loop') for epoch in range(epoch_start, epochs): # loop over the dataset multiple times net.train() with tqdm(total=n_batches, desc=f'Epoch {epoch}/{epochs}', unit='batch') as pbar: for data in train_loader: # get the inputs; data is a list of [input images, depth maps] img, gt_depth = data['image'], data['depth'] if cfg['dataset']['use_mask'] and not cfg['dataset'][ 'add_mask_to_image']: assert 'mask' in data, 'no mask but required mask' mask = data['mask'] else: mask = None loss, pred_depth = step(criterion, img, gt_depth, net, optimizer, mask) loss_value = loss.item() assert loss_value == loss_value, 'loss is nan! tf?' pbar.set_postfix(**{'loss (batch)': loss_value}) running_loss += loss_value pbar.update() if cfg_optim['use_lr_scheduler']: val_score, val_sample = eval_net(net, val_loader) scheduler.step(val_score) # possibly plateau LR. new_lr = optimizer.param_groups[0]['lr'] if old_lr != new_lr: print(fr'old lr: {old_lr}, new lr: {new_lr}') old_lr = new_lr if epoch % print_every == print_every - 1: if not cfg_optim['use_lr_scheduler']: if cfg_validation['val_round']: assert cfg_validation[ 'val_percent'] is not None, 'required val_round but didn\'t give a split size' val_score, val_sample = eval_net(net, val_loader) else: val_score = None val_sample = None train_loss = running_loss / (print_every * n_batches) # TODO: see how to save og image for printing w.o doing it for every batch. train_sample = {**data, 'pred': pred_depth} if cfg['validation']['hist']: viz_net = net else: viz_net = None if depth_postprocessing: logger.info('post-processing prediction and depth.') train_sample = depth_postprocessing(train_sample) if cfg_validation['val_round']: val_sample = depth_postprocessing(val_sample) print_stats(train_sample, val_sample, train_loss, val_score, epoch, writer, viz_net) running_loss = 0.0 if save_every is not None and (epoch % save_every == save_every - 1): save_checkpoint(epoch, net, optimizer, running_loss) print('Finished Training') writer.close() # TODO: graceful death - checkpoint when exiting run as well. if save_every is not None: save_checkpoint(epochs - 1, net, optimizer, 0)
num_ast[num_diffs] += 1 stat["num_files"] = num_files stat["num_push"] = num_push stat["num_ast"] = num_ast return stat master_dict = {} master_results = [] pool = mp.Pool(30) _dirs = list(utils.data_itr()) ast_diffs = [os.path.join(_dir[4], utils.get_folder_name(_dir) + "_master_bug_metadata.json") for _dir in _dirs] pool.map_async(get_num_downloaded, _dirs, callback=collect_result) pool.close() pool.join() stats = [{"num_downloaded": num_downloaded[i], "dir": _dirs[i][4], "fname": ast_diffs[i]} for i in range(0, len(num_downloaded))] pool = mp.Pool(30) results = pool.map(get_stats, stats) master_results += results pool.close() pool.join()
files.append((os.path.join(path, _id, diff), prefix + diff)) js_file = utils.get_source(path, _id, f["buggy_file"]) files.append((js_file, folder_name + "_" + _id + "_" + js_file.replace(path + "/" + _id + "/", ""))) return files if not os.path.exists(rsync_fname): open(rsync_fname, "w").close() with open(rsync_fname, "r") as f: content = f.read() for _dir_tup in tqdm(utils.data_itr()): folder_name = utils.get_folder_name(_dir_tup) _dir = _dir_tup[4] ast_diffs = os.path.join(_dir, folder_name + "_master_bug_shift.json") files = get_files(ast_diffs, _dir, folder_name) for f in files: if f[0] in content: continue with open(rsync_fname, "a") as f_io: f_io.write(f[0] + "\n" + f[1] + "\n")