Beispiel #1
0
def walk_dict(parent, jdict, size_dict=None, method_dict=None, root_name=None):
    for (d, x) in jdict.items():
        path = os.path.join(parent, d)
        size = utils.get_path_size(path)
        key = d
        if root_name is not None:
            key = utils.get_folder_name(path, root_name)
        # print("key:" + key)
        if size_dict is not None and isinstance(size_dict, dict):
            size_dict[key] = size

        count = None
        if method_dict is not None:
            count = get_method_counts_in_file(path)
            if count is not None:
                # print("d:" + d + " count:" + count)
                method_dict[key] = count
        method_count = "method:"
        if count is None:
            method_count = ""
        else:
            method_count += str(count)
        print("path:%-30s | size: %-12s | %-17s" % (
            key, utils.get_size_in_nice_string(size), method_count))
        if isinstance(x, dict):
            walk_dict(path, x, size_dict, method_dict, root_name)
        else:
            pass
Beispiel #2
0
def check_apk_alpha(apk, apk_dir, ignore9, value=255):
    # 遍历要扫描的文件夹s
    count = 0
    # 三个参数:分别返回1.父目录 2.所有文件夹名字(不含路径) 3.所有文件名字
    for parent, dir_names, filenames in os.walk(apk_dir):
        for filename in filenames:
            # 获取文件的绝对路径
            path = os.path.join(parent, filename)

            # 过滤文件类型
            if not os.path.splitext(filename)[1] in ALPHA_IMAGE_FORMAT:
                continue

            # 判断文件存在
            if not filename:
                continue
            # 过滤.9
            if ignore9 and ".9" in filename:
                continue

            # 检查文件类型
            mode = check_img_mode(path, value)
            if mode == 'RGB':
                image_path = utils.get_folder_name(parent, apk_dir) + os.sep + filename
                print ('IMAGE:' + image_path)
                count += 1

    if count > 0:
        print('These %s image(s) may be pngs with no alpha, considering jpeg?' % count)
Beispiel #3
0
def randomize_file_names():
    random_name = utils.get_random_name()
    folder_name = utils.get_folder_name()
    for file in os.listdir(folder_name):
        path_to_file = os.path.join(folder_name, file)
        if os.path.isfile(path_to_file):
            file_ext = os.path.splitext(file)[1]
            os.rename(path_to_file, f"{folder_name}/{random_name}{file_ext}")
Beispiel #4
0
def check_image_limit(apk, apk_dir, limit=40000):
    count = 0
    for parent, dir_names, filenames in os.walk(apk_dir):
        for filename in filenames:
            # 获取文件的绝对路径
            path = os.path.join(parent, filename)

            # 过滤文件类型
            if not os.path.splitext(filename)[1] in LIMIT_IMAGE_FORMAT:
                continue

            # 判断文件存在
            if not filename:
                continue

            file_size = utils.get_path_size(path)
            if long(file_size) > long(limit):
                image_path = utils.get_folder_name(parent, apk_dir) + os.sep + filename
                print('IMAGE:%s  size:%s' % (image_path, utils.get_size_in_nice_string(file_size)))
                count += 1
    if count > 0:
        print("These files may be too large.(larger than %s)" % utils.get_size_in_nice_string(int(limit)))
Beispiel #5
0
def use_clearml(taskid=None):
    """
    does setup for clearml connection.
    Args:
        taskid: id of experiment to be reused. default is a new experiment.

    Returns: clearml_logger, task object.

    """
    if cfg['checkpoint']['use_saved']:
        cfg['checkpoint']['saved_path'] = cfg['checkpoint']['run_name']
        task = Task.init(continue_last_task=True, reuse_last_task_id=taskid)
        task.set_initial_iteration(0)
        # task = Task.get_task(task_id='4f8b87a1e1684be9a8e34ede211d3233')
        # project_name='ariel-mde', task_name=get_folder_name())
    else:
        task = Task.init(project_name='ariel-mde', task_name=get_folder_name())
        config_file = task.connect_configuration(Path('configs.yml'),
                                                 'experiment_config')
        task_cfg = task.connect(
            cfg)  # enabling configuration override by clearml
        set_cfg(task_cfg)
    clearml_logger = task.get_logger()
    return clearml_logger, task
Beispiel #6
0
code_declaration = data[0]
chapter_label = data[1]
heading_label = data[2]
sub_heading_label = data[3]
country_extension_label = data[4]

n_chapter_classes = chapter_label.shape[1]
n_heading_classes = heading_label.shape[1]
n_sub_heading_classes = sub_heading_label.shape[1]
n_country_extension_classes = country_extension_label.shape[1]



model = HierarchicalModel(n_chapter_classes, n_heading_classes, n_sub_heading_classes, n_country_extension_classes)

weight_folder_dir = get_folder_name('model_weights/model_{}')
os.mkdir(weight_folder_dir)
weight_path = weight_folder_dir + "/model_weight.ckpt"
model_parameters = model.get_parameters()
with open(os.path.join(weight_folder_dir, 'model_parameters.json'), 'w') as outfile:
    json.dump(model_parameters, outfile)
    
checkpoint = ModelCheckpoint(weight_path, save_weights_only=True, monitor='loss', verbose=2, save_best_only=True, mode='min')

optimizer = Adam(args.learning_rate)
model.compile(loss='categorical_crossentropy',optimizer = optimizer, metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', mode='min', patience=8, verbose=2, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss',factor=0.1,patience=3,verbose=2,min_lr=1e-5)

print("Traning model with batch size {} and {} epochs".format(args.batch, args.epochs))
Beispiel #7
0
def train():
    """
    main train loop. all configurations are taken from the configs.yml file.
    Returns:
        None, saves checkpoints of net as it trains into file (+ is saved by clearml).
    """
    logger.info('getting params, dataloaders, etc...')
    cfg_train = cfg['train']
    cfg_checkpoint = cfg['checkpoint']
    cfg_validation = cfg['validation']
    epochs = cfg_train['epochs']
    print_every = cfg_train['print_every']
    save_every = cfg_checkpoint['save_every']
    folder_name = get_folder_name()
    writer = SummaryWriter(os.path.join('runs', folder_name))
    loaders = get_loaders()
    train_loader, val_loader = None, None
    len_loaders = len(loaders)
    if len_loaders == 4:
        train_loader, val_loader, test_loader, depth_postprocessing = loaders
    if len_loaders == 3:
        train_loader, val_loader, depth_postprocessing = loaders
    elif len_loaders == 2:
        train_loader, val_loader = loaders
        depth_postprocessing = None
    elif len_loaders == 1:
        train_loader = loaders[0]
        depth_postprocessing = None
    assert train_loader is not None and (
        val_loader is not None
        or not cfg_validation['val_round']), "problem with loader."
    n_batches = len(train_loader)
    cfg_model = cfg['model']
    cfg_checkpoint = cfg['checkpoint']
    cfg_optim = cfg['optim']
    if cfg_checkpoint['use_saved']:
        net, optimizer, epoch_start, running_loss = load_checkpoint()
        criterion = get_loss_function()
        epoch_start = epoch_start + 1  # since we stopped at the last epoch, continue from the next.
    else:
        criterion, net, optimizer = get_net()
        running_loss = 0.0
        epoch_start = 0
    if cfg_optim['use_lr_scheduler']:
        old_lr = optimizer.param_groups[0]['lr']
        scheduler = ReduceLROnPlateau(optimizer, mode='min')
    logger.info('got all params, starting train loop')
    for epoch in range(epoch_start,
                       epochs):  # loop over the dataset multiple times
        net.train()
        with tqdm(total=n_batches,
                  desc=f'Epoch {epoch}/{epochs}',
                  unit='batch') as pbar:
            for data in train_loader:
                # get the inputs; data is a list of [input images, depth maps]
                img, gt_depth = data['image'], data['depth']
                if cfg['dataset']['use_mask'] and not cfg['dataset'][
                        'add_mask_to_image']:
                    assert 'mask' in data, 'no mask but required mask'
                    mask = data['mask']
                else:
                    mask = None
                loss, pred_depth = step(criterion, img, gt_depth, net,
                                        optimizer, mask)
                loss_value = loss.item()
                assert loss_value == loss_value, 'loss is nan! tf?'
                pbar.set_postfix(**{'loss (batch)': loss_value})
                running_loss += loss_value
                pbar.update()

            if cfg_optim['use_lr_scheduler']:
                val_score, val_sample = eval_net(net, val_loader)
                scheduler.step(val_score)  # possibly plateau LR.
                new_lr = optimizer.param_groups[0]['lr']
                if old_lr != new_lr:
                    print(fr'old lr: {old_lr}, new lr: {new_lr}')
                old_lr = new_lr
            if epoch % print_every == print_every - 1:
                if not cfg_optim['use_lr_scheduler']:
                    if cfg_validation['val_round']:
                        assert cfg_validation[
                            'val_percent'] is not None, 'required val_round but didn\'t give a split size'
                        val_score, val_sample = eval_net(net, val_loader)
                    else:
                        val_score = None
                        val_sample = None
                train_loss = running_loss / (print_every * n_batches)
                # TODO: see how to save og image for printing w.o doing it for every batch.
                train_sample = {**data, 'pred': pred_depth}
                if cfg['validation']['hist']:
                    viz_net = net
                else:
                    viz_net = None
                if depth_postprocessing:
                    logger.info('post-processing prediction and depth.')
                    train_sample = depth_postprocessing(train_sample)
                    if cfg_validation['val_round']:
                        val_sample = depth_postprocessing(val_sample)
                print_stats(train_sample, val_sample, train_loss, val_score,
                            epoch, writer, viz_net)
                running_loss = 0.0
            if save_every is not None and (epoch % save_every
                                           == save_every - 1):
                save_checkpoint(epoch, net, optimizer, running_loss)
    print('Finished Training')
    writer.close()
    # TODO: graceful death - checkpoint when exiting run as well.
    if save_every is not None:
        save_checkpoint(epochs - 1, net, optimizer, 0)
Beispiel #8
0
            num_ast[num_diffs] += 1
    
    stat["num_files"] = num_files
    stat["num_push"] = num_push
    stat["num_ast"] = num_ast

    return stat

master_dict = {}
master_results = []

pool = mp.Pool(30)

_dirs = list(utils.data_itr())

ast_diffs = [os.path.join(_dir[4], utils.get_folder_name(_dir) + "_master_bug_metadata.json") for _dir in _dirs]

pool.map_async(get_num_downloaded, _dirs, callback=collect_result)

pool.close()
pool.join()

stats = [{"num_downloaded": num_downloaded[i], "dir": _dirs[i][4], "fname": ast_diffs[i]} for i in range(0, len(num_downloaded))]

pool = mp.Pool(30)
results = pool.map(get_stats, stats)
master_results += results

pool.close()
pool.join()
Beispiel #9
0
            files.append((os.path.join(path, _id, diff), prefix + diff))

            js_file = utils.get_source(path, _id, f["buggy_file"])

            files.append((js_file, folder_name + "_" + _id + "_" +
                          js_file.replace(path + "/" + _id + "/", "")))

    return files


if not os.path.exists(rsync_fname):
    open(rsync_fname, "w").close()

with open(rsync_fname, "r") as f:
    content = f.read()

for _dir_tup in tqdm(utils.data_itr()):

    folder_name = utils.get_folder_name(_dir_tup)

    _dir = _dir_tup[4]
    ast_diffs = os.path.join(_dir, folder_name + "_master_bug_shift.json")

    files = get_files(ast_diffs, _dir, folder_name)

    for f in files:
        if f[0] in content: continue

        with open(rsync_fname, "a") as f_io:
            f_io.write(f[0] + "\n" + f[1] + "\n")