def serial_gradcam(model,smpl_census_imnames,indices,ind_batched_list,class_):
    # CAM
    full_grad_cams, full_grad_cam_bgs = [], []
    # Guided BackPropation
    print("Guided BackPropagation")
    guided_bprop = GuidedBackprop(model,output_index=class_);
    print("Batching")
    for i in tqdmn(range(len(ind_batched_list)-1)):
        batch_smpl_census_imnames = smpl_census_imnames[ind_batched_list[i]:ind_batched_list[i+1]]
        batch_sample_census_cell_imgs = [load_prepared_img(im)
                                         for im in batch_smpl_census_imnames]
        batch_classes = [class_
                         for j in range(ind_batched_list[i],ind_batched_list[i+1])]
        grad_cams, grad_cam_rzs = grad_cam_batch(model,
                                                 np.stack(batch_sample_census_cell_imgs),
                                                 batch_classes, conv_name)
        masks = [guided_bprop.get_mask(img)
                 for img in tqdmn(batch_sample_census_cell_imgs)]
        images = np.stack([np.sum(np.abs(mask), axis=2) for mask in tqdmn(masks)])

        # Combination
        gradcam_bgs = np.multiply(grad_cam_rzs,images)
        upper_percs = np.percentile(gradcam_bgs,99,(1,2))
        gradcam_bgs = np.minimum(gradcam_bgs,np.stack([k *np.ones((W,H))
                                                       for k in upper_percs]))
        full_grad_cams.append(grad_cams)
        full_grad_cam_bgs.append(gradcam_bgs)
    #
    full_grad_cams = np.vstack(full_grad_cams)
                     if len(full_grad_cams) > 1 else full_grad_cams
def matrix_factorization(X, P, Q, K, steps, alpha, beta):
    nonnull = np.where(~np.isnan(X))

    Q = Q.T
    for step in tqdmn(range(steps)):
        for idx in tqdmn(range(nonnull[0].size), leave=False):
            i, j = nonnull[0][idx], nonnull[1][idx]

            #calculate the error of the element
            eij = X[i][j] - np.dot(P[i, :], Q[:, j])

            #second norm of P and Q for regularilization
            sum_of_norms = LA.norm(P) + LA.norm(Q)

            #print sum_of_norms
            eij += ((beta / 2.0) * sum_of_norms)

            #compute the gradient from the error
            P[i, :] += alpha * (2 * eij * Q[:, j] - (beta * P[i, :]))
            Q[:, j] += alpha * (2 * eij * P[i, :] - (beta * Q[:, j]))

        V = P.dot(Q)
        error = np.sum(np.power(X[nonnull] - V[nonnull], 2))

        if error < 0.001:
            break

    return P, Q.T
Example #3
0
def smiles2mol(
    df: pd.DataFrame,
    smiles_column_name,
    mols_column_name,
    drop_nulls: bool = True,
    progressbar: Union[None, str] = None,
) -> pd.DataFrame:
    """
    Convert a column of SMILES strings into RDKit Mol objects.

    Automatically drops invalid SMILES, as determined by RDKIT.

    Method chaining usage:

    .. code-block:: python

        df = (
            pd.DataFrame(...)
            .smiles2mol(smiles_column_name='smiles', mols_column_name='mols')
        )

    A progressbar can be optionally used.

    - Pass in "notebook" to show a tqdm notebook progressbar. (ipywidgets must
      be enabled with your Jupyter installation.)
    - Pass in "terminal" to show a tqdm progressbar. Better suited for use
      with scripts.
    - "none" is the default value - progress bar will be not be shown.

    :param df: pandas DataFrame.
    :param smiles_column_name: Name of column that holds the SMILES strings.
    :param mols_column_name: Name to be given to the new mols column.
    :param drop_nulls: Whether to drop rows whose mols failed to be
        constructed.
    :param progressbar: Whether to show a progressbar or not.
    """
    valid_progress = ["notebook", "terminal", None]
    if progressbar not in valid_progress:
        raise ValueError(f"progressbar kwarg must be one of {valid_progress}")

    if progressbar is None:
        df[mols_column_name] = df[smiles_column_name].apply(
            lambda x: Chem.MolFromSmiles(x))
    else:
        if progressbar == "notebook":
            tqdmn().pandas(desc="mols")
        elif progressbar == "terminal":
            tqdm.pandas(desc="mols")
        df[mols_column_name] = df[smiles_column_name].progress_apply(
            lambda x: Chem.MolFromSmiles(x))

    if drop_nulls:
        df.dropna(subset=[mols_column_name], inplace=True)
    df.reset_index(inplace=True, drop=True)
    return df
def parallel_make_dataset(im_data, CPU_USE, null_thresh=1):
    # Extract all images in chunks distributed according to CPU_USE

    if CPU_USE > 1:
        pre_full = Parallel(n_jobs=CPU_USE)(
            delayed(parallel_folder_extraction)(im_arr,
                                                null_thresh=null_thresh)
            for im_arr in tqdmn(chunks(im_data, CPU_USE)))
    else:
        pre_full = [
            parallel_folder_extraction(im_arr, null_thresh=null_thresh)
            for im_arr in tqdmn(chunks(im_data, CPU_USE))
        ]

    return [data for pre in pre_full for data in pre]
Example #5
0
def smiles2mol(
    df: pd.DataFrame,
    smiles_col: str,
    mols_col: str,
    drop_nulls: bool = True,
    progressbar: Union[None, str] = None,
):
    """
    Convert a column of SMILES strings into RDKit Mol objects.

    Automatically drops invalid SMILES, as determined by RDKIT.

    Method chaining usage:

    .. code-block:: python

        df = (
            pd.DataFrame(...)
            .smiles2mol(smiles_col='smiles', mols_col='mols')
        )

    :param df: pandas DataFrame.
    :param smiles_col: Name of column that holds the SMILES strings.
    :param mols_col: Name to be given to the new mols column.
    :param drop_nulls: Whether to drop rows whose mols failed to be
        constructed.
    """
    valid_progress = ["notebook", "terminal", None]
    if progressbar not in valid_progress:
        raise ValueError(f"progressbar kwarg must be one of {valid_progress}")

    if progressbar is None:
        df[mols_col] = df[smiles_col].apply(lambda x: Chem.MolFromSmiles(x))
    else:
        if progressbar == "notebook":
            tqdmn().pandas(desc="mols")
        elif progressbar == "terminal":
            tqdm.pandas(desc="mols")
        df[mols_col] = df[smiles_col].progress_apply(
            lambda x: Chem.MolFromSmiles(x))

    if drop_nulls:
        df.dropna(subset=[mols_col], inplace=True)
    df.reset_index(inplace=True, drop=True)
    return df
def serialize_batch(model,ua_data,gdf_full_im_df,indices,ind_list,
                    class_poor,class_rich,ideal_workload):
    print("Overlaying")
    test_cores = [gpd.overlay(ua_data.iloc[indices[ind]],
                              gdf_full_im_df.iloc[ind:(ind+1)],
                              how='intersection') for ind in tqdmn(ind_list)]
    print("Bounding")
    ts = [from_bounds(gdf_full_im_df[ind:(ind+1)].bounds.minx.values[0]+0,
                gdf_full_im_df[ind:(ind+1)].bounds.miny.values[0]+0,
                gdf_full_im_df[ind:(ind+1)].bounds.maxx.values[0]+0,
                gdf_full_im_df[ind:(ind+1)].bounds.maxy.values[0]+0,
                W, H) for ind in tqdmn(ind_list)]
    print("Generating Images")
    sample_datas = [gdf_full_im_df.iloc[ind] for ind in tqdmn(ind_list) ]
    smpl_census_imnames = [IMG_OUTPUT_DIR + val.path2im for val in tqdmn(sample_datas)]
    #
    data = []
    for batch_idx in range(0,len(ind_list),ideal_workload):
        batch_ind_list = ind_list[batch_idx:(batch_idx+ideal_workload)]
        batch_smpl_census_imnames = smpl_census_imnames[batch_idx:(batch_idx+ideal_workload)]
        batch_indices = indices[batch_idx:(batch_idx+ideal_workload)]
        ind_batched_list = list(np.arange(0,len(batch_ind_list),MAX_BS))
        if ind_batched_list[-1] != len(batch_ind_list):
            ind_batched_list.append(len(batch_ind_list))
        #
        print("GradCaming POOR")
        gcams_poor, gbgs_poor = serial_gradcam(model,batch_smpl_census_imnames,
                                               batch_indices,ind_batched_list,
                                               class_poor)
        print("GradCaming RICH")
        gcams_rich, gbgs_rich = serial_gradcam(model,batch_smpl_census_imnames,
                                               batch_indices,ind_batched_list,
                                               class_rich)
        print("Computing raster statistics")
        for j,ind in tqdmn(enumerate(batch_ind_list)):
            val_idINSPIRE = gdf_full_im_df.iloc[ind:(ind+1)].idINSPIRE.values[0]
            data.append(
                compute_statistics(gbgs_poor[j],gbgs_rich[j],
                                   gcams_poor[j],gcams_rich[j],
                                   ts[ind],test_cores[ind],
                                   val_idINSPIRE,class_poor,class_rich))
    return data
                                   ts[ind],test_cores[ind],
                                   val_idINSPIRE,class_poor,class_rich))
    return data

if __name__ == '__main__':
    print("Generating Full DataSet")
    full_im_df = generate_full_idINSPIRE(UA_DIR, AERIAL_DIR, CENSUS_DIR, IMG_OUTPUT_DIR)
    city_assoc = pd.read_csv(IMG_OUTPUT_DIR + "city_assoc.csv")
    full_im_df_ua = pd.merge(full_im_df,city_assoc,on="idINSPIRE");
    full_im_df_ua = full_im_df_ua[full_im_df_ua.FUA_NAME == city]
    #
    gdf_full_im_df = full_im_df_ua.to_crs({'init': 'epsg:3035'})
    #
    print("Generating UA DataSet")
    ua_data = gpd.GeoDataFrame(pd.concat([gpd.read_file(d)
                    for d in tqdmn(glob.glob(UA_DIR+"**/Shapefiles/*UA2012.shp"))]))
    ua_data.crs = {'init': 'epsg:3035'}
    #
    print("Joining UA + Full")
    indices = sjoin(ua_data,gdf_full_im_df)
    #
    print("Loading Model")
    #indices to distribute among cores
    folds_data = pd.concat(
        [pd.read_csv(fold_file,header=0,sep=",")
         for fold_file in glob.glob(MODEL_OUTPUT_DIR+"/*last_best_models.csv")],
        axis=0).reset_index(drop=True)
    best_model_city = folds_data.ix[folds_data["Validation loss"].idxmin()]["Model file"]
    print("Loading Weights {}".format(best_model_city))
    #
    eff_model = load_model(MODEL_OUTPUT_DIR + best_model_city,