Ejemplo n.º 1
0
def main():

    # Parse command line arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_folder',
                        help="Data from the Platform",
                        required=True)
    parser.add_argument('--trained_model', help="Trained model", required=True)
    args = parser.parse_args()

    # Parameters
    data_folder = args.data_folder
    trained_model = args.trained_model

    # Configurations
    SETUP_PATH = 'configuration_test.yml'
    configurations = Configurations(SETUP_PATH)

    eprint(''.join("%s:\t%s\n" % item
                   for item in vars(configurations).items()))

    # Data
    get_data_test(data_folder, configurations, trained_model)

    # Inference
    # predictions = test_model(X_test, data_folder, trained_model, configurations)

    # for i in range(5):
    generate_visuals(data_folder,
                     os.path.join(configurations.output_folder, 'Prediction/'),
                     thold_area=0)
Ejemplo n.º 2
0
def _recursive_get_urls(crawled_urls,
                        test_page,
                        max_urls,
                        parent_url,
                        domain,
                        depth=0):
    if depth == 0 or len(crawled_urls) == max_urls:
        return crawled_urls
    asyncio.get_event_loop().run_until_complete(get_page(
        test_page, parent_url))

    html = test_page.source
    soup = BeautifulSoup(html, features='html.parser')

    urls = soup.findAll('a')
    for a in set(urls):
        url = a.get('href')
        if url is None:
            continue
        if url.startswith('/'):
            url = parent_url.rstrip('/') + url
        if urlparse(url).netloc == domain and url not in crawled_urls:
            if len(crawled_urls) <= max_urls:
                crawled_urls.append(url)
                eprint('[LOG] Added: {}'.format(url))
                _recursive_get_urls(crawled_urls, max_urls, url, domain,
                                    depth - 1)
Ejemplo n.º 3
0
def predict(image_file):
    model_path = os.path.join('inference/model_files', 'frednetv2.pth')
    if not os.path.exists(model_path):
        eprint("[ERR] Model file does not exist")
        exit(4)
    model = NNet()
    model.load_state_dict(torch.load(model_path, map_location='cpu'))
    model.eval()

    with torch.no_grad():
        pilim = Image.open(image_file).convert('L').convert('RGB')
        pilim = preprocess_pilim(pilim)
        input_array = prepare_for_input(pilim, flip_lr=False)

        lr_input_array = prepare_for_input(pilim, flip_lr=True)
        try:
            out_array = get_output(model(get_tensor(input_array)))
        except:
            exit(2)

        lr_out_array = np.fliplr(get_output(model(get_tensor(lr_input_array))))

    out_array = (out_array + lr_out_array) / 2
    out_array = threshold_output(out_array, 0.5)
    out_array *= 255
    out_array = np.array(out_array, dtype='uint8')

    return out_array
Ejemplo n.º 4
0
def get_recursive_urls(parent_url, max_depth, max_urls):
    scraped_urls = [parent_url]
    domain = urlparse(parent_url).netloc
    page = MyPage()
    asyncio.get_event_loop().run_until_complete(get_page(page, parent_url))
    _recursive_get_urls(scraped_urls,
                        page,
                        max_urls,
                        parent_url,
                        domain,
                        depth=max_depth)
    eprint('[LOG] Finished crawling URLs for {}'.format(parent_url))
    return scraped_urls
Ejemplo n.º 5
0
def get_data_test(data_folder, configurations, trained_model):

    # Parameters
    IMG_WIDTH = configurations.size_img
    IMG_HEIGHT = configurations.size_img
    IMG_CHANNELS = 3
    TEST_PATH = data_folder
    COUNT = configurations.sample_count

    # Path of Image Tiles and Masks
    path = os.path.join(TEST_PATH, "img")
    # path_mask = os.path.join(TEST_PATH, "mask")

    # total = int(sum([len(files) for r, d, files in os.walk(path)]))

    eprint(
        f'[DEBUG][get_data_test]  Getting and Resizing({IMG_WIDTH}x{IMG_HEIGHT}) Test Images and Masks... '
    )

    # Get and resize Test images and masks
    # test_cpt = int(sum([len(files) for r, d, files in os.walk(path)]))

    # X_test = np.ndarray((test_cpt, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.float32)
    # Y_test = np.ndarray((test_cpt, IMG_HEIGHT, IMG_WIDTH, 1), dtype=np.float32)  # dtype=np.bool)

    eprint(
        f'[DEBUG][get_data_test] Getting and Resizing Test Images and Masks Done!\nPath to img: {path}'
    )
    sys.stdout.flush()

    _, _, files_orj = next(os.walk(path))
    # _, _, files_mask = next(os.walk(path_mask))
    files_orj = sorted(files_orj)
    # files_mask = sorted(files_mask)

    eprint(f'[DEBUG][get_data_test] Number of Image Tiles: {len(files_orj)}')

    # for i, f in enumerate(files_orj[:COUNT]):
    #     img = cv2.imread(os.path.join(path, f))
    #     img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH), interpolation=cv2.INTER_AREA)
    #     img = img / 255
    #     X_test[i] = img

    # for i, fm in enumerate(files_mask[:COUNT]):
    #     img_mask = cv2.imread(os.path.join(path_mask, fm), cv2.IMREAD_GRAYSCALE)
    #     img_mask = cv2.resize(img_mask, (IMG_HEIGHT, IMG_WIDTH), interpolation=cv2.INTER_AREA)
    #     img_mask = img_mask / 255
    #     img_mask = np.expand_dims(img_mask, axis=-1)
    #     Y_test[i] = img_mask
    # Load Trained Model
    model = load_model(trained_model,
                       custom_objects={
                           'dice_coef': dice_coef,
                           'dice_coef_loss': dice_coef_loss
                       })

    for i, f in enumerate(files_orj):
        X_test = np.ndarray((1, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS),
                            dtype=np.float32)

        img = cv2.imread(os.path.join(path, f))
        img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH),
                         interpolation=cv2.INTER_AREA)
        img = img / 255
        X_test[0] = img
        # predictions = test_model(X_test, data_folder, trained_model, configurations)
        # Predict
        preds_test = model.predict(X_test)
        preds_reshaped = np.ndarray((1, IMG_HEIGHT, IMG_WIDTH),
                                    dtype=np.float32)
        preds_reshaped[0] = preds_test[0].reshape(IMG_HEIGHT, IMG_WIDTH)

        preds_upsampled = [
            np.expand_dims(cv2.resize(preds_reshaped[0],
                                      (IMG_HEIGHT, IMG_WIDTH)),
                           axis=-1)
        ]
        print("[INFO] Upsampling is done!(upsampled to ({}, {}) from ({}, {})".
              format(IMG_HEIGHT, IMG_WIDTH, preds_test[0].shape[0],
                     preds_test[0].shape[1]))

        output_pred = os.path.join(configurations.output_folder, 'Prediction')
        mkdir_if_not_exist(configurations.output_folder)
        mkdir_if_not_exist(output_pred)
        theshold_pred = 0.5

        img = preds_upsampled[0].copy()

        img_raw = img * 255
        out_name_raw = os.path.join(output_pred, "pred-raw-" + files_orj[i])
        cv2.imwrite(out_name_raw, img_raw)

        img[img > theshold_pred] = 1
        img[img <= theshold_pred] = 0
        img *= 255

        out_name = os.path.join(output_pred, "pred-" + files_orj[i])
        cv2.imwrite(out_name, img)

        print('[INFO] Finished Prediction!')
Ejemplo n.º 6
0
def get_data(configurations, data_folder):

    # Write Directory
    dir_write = os.path.join(
        configurations.dir_write,
        '/Run_Train_' + configurations.model_name + '_' + str(current_time))
    dir_pred = os.path.join(dir_write, 'Pred_imgs')
    dir_model = os.path.join(dir_write, 'Model')
    dir_log = os.path.join(dir_write, 'Log')

    if not os.path.exists(dir_write):
        os.makedirs(dir_write)
        os.makedirs(dir_pred)
        os.makedirs(dir_model)
        os.makedirs(dir_log)

    IMG_WIDTH = configurations.size_img
    IMG_HEIGHT = configurations.size_img
    IMG_CHANNELS = 3
    TRAIN_PATH = data_folder

    # Path of Image Tiles and Masks
    print(data_folder)
    path = os.path.join(TRAIN_PATH, "img")
    path_mask = os.path.join(TRAIN_PATH, "mask")
    path_bud_info = os.path.join(TRAIN_PATH, "Bud_Info")

    eprint(
        f'[DEBUG][get_data] Getting and Resizing({IMG_WIDTH}x{IMG_HEIGHT}) Train Images and Masks... '
    )

    # Get and resize train images and masks
    train_cpt = int(
        sum([len(files) for r, d, files in os.walk(TRAIN_PATH + "img/")]))

    eprint(
        f'[DEBUG][get_data] Getting and Resizing Train Images and Masks Done!\nPath to img: {path}'
    )
    sys.stdout.flush()

    _, _, files_orj = next(os.walk(path))
    _, _, files_mask = next(os.walk(path_mask))
    files_orj = sorted(files_orj)
    files_mask = sorted(files_mask)

    eprint(
        f'[DEBUG][get_data] Number of Image Tiles: {len(files_orj)}\t Number of Image Masks: {len(files_mask)}\n'
    )

    train_cpt_filtered = len(files_orj)
    files_orj_filtered = files_orj
    files_mask_filtered = files_mask

    if int(configurations.thold_tbud) > 0:
        train_cpt_filtered = 0
        files_orj_filtered = []
        files_mask_filtered = []

        for i, f in enumerate(files_orj):
            # Apply Bud Threshold
            if filter_tbud_count(path_bud_info, f,
                                 int(configurations.thold_tbud)):
                train_cpt_filtered += 1
                files_orj_filtered.append(files_orj[i])
                files_mask_filtered.append(files_mask[i])

    X_train = np.ndarray(
        (train_cpt_filtered, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS),
        dtype=np.float32)
    Y_train = np.ndarray((train_cpt_filtered, IMG_HEIGHT, IMG_WIDTH, 1),
                         dtype=np.float32)  # dtype=np.bool)

    for i, f in enumerate(files_orj_filtered):
        # # Apply Bud Threshold
        # if not filter_tbud_count(path_bud_info, f, configurations.thold_tbud):
        #     continue
        img = cv2.imread(os.path.join(path, f))
        img = cv2.resize(img, (IMG_HEIGHT, IMG_WIDTH),
                         interpolation=cv2.INTER_AREA)
        img = img / 255
        X_train[i] = img

    for i, fm in enumerate(files_mask_filtered):
        # # Apply Bud Threshold
        # if not filter_tbud_count(path_bud_info, fm, configurations.thold_tbud):
        #     continue
        img_mask = cv2.imread(os.path.join(path_mask, fm),
                              cv2.IMREAD_GRAYSCALE)
        img_mask = cv2.resize(img_mask, (IMG_HEIGHT, IMG_WIDTH),
                              interpolation=cv2.INTER_AREA)
        img_mask = img_mask / 255
        img_mask = np.expand_dims(img_mask, axis=-1)
        Y_train[i] = img_mask

    eprint(
        f'[DEBUG][get_data] After Filter thold_tbud:{configurations.thold_tbud} Number of Image Tiles: {len(X_train)}\t Number of Image Masks: {len(Y_train)}\n'
    )

    eprint(
        f"[DEBUG][INFO] Data Matrix: {round(X_train.nbytes / (1024 * 1000.0),3)} MB\n"
    )
    pixels = Y_train.flatten().reshape(train_cpt_filtered,
                                       IMG_HEIGHT * IMG_WIDTH)
    weights_train = pixels.copy()
    pixels = np.expand_dims(pixels, axis=-1)
    eprint(f"Data Read is Done!")

    return X_train, pixels
Ejemplo n.º 7
0
def train_model(X, y, configurations):

    # Parameters - IMG
    IMG_HEIGHT = int(configurations.size_img)
    IMG_WIDTH = int(configurations.size_img)
    IMG_CHANNELS = 3

    # Parameters - Model
    lr_rate = float(configurations.learning_rate)
    model_name = str(configurations.model_name)
    model_type = str(configurations.model_type)
    dir_write = mkdir_if_not_exist(str(configurations.dir_write))
    activation = str(configurations.activation)
    batch_size = int(configurations.batch_size)
    epochs = int(configurations.epoch)
    dropout_ratio = float(configurations.dropout_ratio)
    dropout_level = int(configurations.dropout_level)
    model_string = str(configurations.model_string)
    eprint(f"[INFO][train_model] {model_string}")

    # Free up RAM in case the model definition cells were run multiple times
    K.clear_session()
    # Stop training when a monitoring quantity has stopped improving
    # earlystopper = EarlyStopping(monitor='val_loss', patience=100, verbose=1)

    # Initialize the model
    if model_type.lower() == 'resunet':
        model = unetModel_residual(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS, dropout_ratio=dropout_ratio, \
            lr_rate=lr_rate, activation=activation, dropout_level=dropout_level)
    else:
        model = unetModel_basic_4(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS, dropout_ratio=dropout_ratio, \
            lr_rate=lr_rate, activation=activation, dropout_level=dropout_level)

    # Save the model after every epoch
    checkpointer = ModelCheckpoint(dir_write + "/" + model_string + '_main_modelCheckpoint.h5', verbose=0, monitor='val_loss', \
                                   save_best_only=True, save_weights_only=False, period=1, mode='auto')

    # Log training
    csv_logger = CSVLogger('{}/log_{}.training.csv'.format(
        dir_write, model_string))
    # Reduce lr_rate on plateau
    reduce_lr = ReduceLROnPlateau(monitor='val_dice_coef',
                                  factor=0.5,
                                  patience=10,
                                  verbose=0,
                                  mode='max',
                                  cooldown=1,
                                  min_lr=0.000001)
    # Early stopping with patience
    earlystopping = EarlyStopping(monitor='val_dice_coef',
                                  patience=25,
                                  mode='max')

    # Fit model
    eprint("[INFO][train_model] Model Fit...")
    results = model.fit(
        X,
        y,
        validation_split=0.2,
        batch_size=batch_size,
        epochs=epochs,
        callbacks=[checkpointer, csv_logger, reduce_lr, earlystopping],
        verbose=1,
        shuffle=True)  #, sample_weight=weights_train)
    eprint("[INFO][train_model] Model Fit Done!")

    # Write model history to the file
    pd.DataFrame(results.history).to_csv(dir_write + "history_" +
                                         model_string + ".csv")

    return model, results
Ejemplo n.º 8
0
def work(baseline_dir, updated_dir, prefix):
    baseline_dir = os.path.join("./tmp", baseline_dir)
    updated_dir = os.path.join("./tmp", updated_dir)
    images = sorted([
        image for image in os.listdir(baseline_dir) if image.endswith('.png')
    ])
    scores_dict = {}

    for i, image in enumerate(images):
        mask_matches = []
        baseline_image_path = os.path.join(baseline_dir, image)
        updated_image_path = os.path.join(updated_dir, image)
        baseline_image = load_image_helper(baseline_image_path)
        updated_image = load_image_helper(updated_image_path)

        eprint('[LOG] Making prediction for baseline - {}, image - {}'.format(
            prefix, image))
        baseline_image_mask = predict(baseline_image_path)
        eprint('[LOG] Saving masks for baseline - {}'.format(prefix))
        save_masks(baseline_dir, image, baseline_image_mask, baseline_image)
        eprint('[LOG] Making prediction for updated - {}, image - {}'.format(
            prefix, image))
        updated_image_mask = predict(updated_image_path)
        eprint('[LOG] Saving masks for updated - {}'.format(prefix))
        save_masks(updated_dir, image, updated_image_mask, updated_image)
        eprint('[LOG] Finished predictions')

        if baseline_image.shape != updated_image.shape:
            eprint('[LOG] Images have different shapes. Using DP algo')
            for c in range(0, 5):
                mask_matches.append(
                    match_images(baseline_image_mask[:, :, c],
                                 updated_image_mask[:, :, c], STEP))

        eprint('[LOG] Calculating mask divergence score for {}, image - {}'.
               format(prefix, image))
        mask_divergence_scores = Scores.diff_mask_divergence(
            baseline_image_mask // 255, updated_image_mask // 255,
            mask_matches)

        eprint(
            '[LOG] Calculating pixelwise divergence score for {}, image - {}'.
            format(prefix, image))

        pixelwise_divergence_scores = Scores.diff_pixelwise_divergence(
            baseline_image, updated_image, baseline_image_mask // 255,
            updated_image_mask // 255, mask_matches)
        baseline_js_log_file = os.path.join(
            baseline_dir,
            image.split('.')[0] + "_js_log.json")
        updated_js_log_file = os.path.join(
            updated_dir,
            image.split('.')[0] + "_js_log.json")
        baseline_network_log_file = os.path.join(
            baseline_dir,
            image.split('.')[0] + "_network_log.json")
        updated_network_log_file = os.path.join(
            updated_dir,
            image.split('.')[0] + "_network_log.json")

        log_processor = LogProcessor(baseline_js_log_file, updated_js_log_file,
                                     baseline_network_log_file,
                                     updated_network_log_file)
        result = log_processor.run()

        ui_risk_score = max(mask_divergence_scores['overall'],
                            pixelwise_divergence_scores['overall'])

        scores_dict[i + 1] = {
            'ui_stats': {
                'mask_div': mask_divergence_scores,
                'pixelwise_div': pixelwise_divergence_scores,
                'risk_score': ui_risk_score
            },
            'js_stats': result['javascript'],
            'network_stats': result['network'],
            'risk_score': result['risk_score']
        }
    with open(os.path.join('./tmp', prefix + '_scores.json'), 'w') as f:
        json.dump(scores_dict, f, indent=2)
    eprint('[LOG] Saved scores dictionary for {}'.format(prefix))
def work(baseline_url, updated_url, max_depth, max_urls, prefix,
         auth_baseline_username, auth_baseline_password, auth_updated_username,
         auth_updated_password):
    baseline_url = add_auth(url=baseline_url,
                            username=auth_baseline_username,
                            password=auth_baseline_password)
    updated_url = add_auth(url=updated_url,
                           username=auth_updated_username,
                           password=auth_updated_password)
    crawled_baseline = get_recursive_urls(baseline_url, max_depth,
                                          max_urls)[:max_urls]
    crawled_upgraded = get_recursive_urls(updated_url, max_depth,
                                          max_urls)[:max_urls]

    baseline_domain = get_domain(baseline_url)
    updated_domain = get_domain(updated_url)

    crawled_baseline_paths = [get_path(path) for path in crawled_baseline]
    crawled_updated_paths = [get_path(path) for path in crawled_upgraded]

    all_paths = list(set(crawled_baseline_paths) | set(crawled_updated_paths))
    ss_report = {}

    for i, path in enumerate(all_paths):
        eprint('[LOG] Taking screenshots for {} - {}'.format(prefix, path))
        collect_data(baseline_domain + path, prefix + '_baseline',
                     '{}.png'.format(i + 1))
        collect_data(updated_domain + path, prefix + '_updated',
                     '{}.png'.format(i + 1))
        ss_report[i + 1] = {
            'baseline': baseline_domain + path,
            'updated': updated_domain + path,
            'endpoint': path,
            'baseline_assets': 'tmp/' + prefix + "_baseline/",
            'updated_assets': 'tmp/' + prefix + "_updated/"
        }
    eprint('[LOG] Finished taking screenshots for {}'.format(prefix))
    with open(os.path.join('./tmp', prefix + '_ss_report.json'), 'w') as f:
        json.dump(ss_report, f, indent=2)

    p = Popen([
        'python3', 'worker_predict.py', '--baseline-dir', prefix + '_baseline',
        '--updated-dir', prefix + '_updated', '--prefix', prefix
    ])
    if p.poll() is not None and p.poll() > 0:
        eprint('[ERR] Failed to launch inference process')
        exit(3)
    eprint('[LOG] Waiting for {}'.format(prefix))
    p.wait()
    if p.poll() != 0:
        eprint('[ERR] Prediction script failed for {}'.format(prefix))
        exit(p.poll())
    eprint('[LOG] Finished prediction for {}'.format(prefix))

    ui_risk_scores = []
    network_risk_scores = []
    js_stats_total = []
    net_stats_total = []
    pixelwise_div_total = []
    mask_div_total = []
    with open(os.path.join('./tmp', prefix + '_report.json'), 'w') as f:
        scores_report = json.load(
            open(os.path.join('./tmp', prefix + '_scores.json')))
        screenshots_report = json.load(
            open(os.path.join('./tmp', prefix + '_ss_report.json')))
        page_report = {}
        for i in range(1, len(all_paths) + 1):
            page_report[i] = scores_report[str(i)]
            js_stats_total.append(scores_report[str(i)]["js_stats"])
            net_stats_total.append(scores_report[str(i)]["network_stats"])
            page_report[i]['links'] = screenshots_report[str(i)]
            ui_risk_scores.append(page_report[i]["ui_stats"]["risk_score"])
            network_risk_scores.append(page_report[i]["risk_score"])
            pixelwise_div_total.append(
                page_report[i]['ui_stats']['pixelwise_div'])
            mask_div_total.append(page_report[i]['ui_stats']['mask_div'])
        page_report['risk_score'] = max(max(ui_risk_scores),
                                        max(network_risk_scores))

        page_report['js_stats'] = dsum(js_stats_total)
        page_report['ui_stats'] = {
            'pixelwise_div_mean': dsum(pixelwise_div_total, True),
            'mask_div_mean': dsum(mask_div_total, True),
            'pixelwise_div_std': dstd(pixelwise_div_total),
            'mask_div_std': dstd(mask_div_total)
        }
        page_report['network_stats'] = dsum(net_stats_total)
        json.dump(page_report, f, indent=4)
        eprint('[LOG] Saved {} report to {}'.format(prefix,
                                                    prefix + '_report.json'))
        os.remove(os.path.join('./tmp', prefix + '_scores.json'))
        os.remove(os.path.join('./tmp', prefix + '_ss_report.json'))

    exit(0)