예제 #1
0
def extract_stats_from_plate(csv, dest):
    print(f'Processing: {csv}')
    plate_number = csv.split(".")[0]

    plate_folder = path.join(dest, plate_number)
    makedirs(plate_folder, exist_ok=True)

    df = load_plate_csv(csv)

    gen_cols, corr_cols, channel_dict = list_columns(df)
    df.drop(corr_cols, axis=1, inplace=True)
    df = df[df.index.isin(['mock'], 1)]

    gb = df.groupby(['Plate', 'Image_Metadata_Well'])

    desc_wells = gb.describe()
    desc_wells.to_csv(path.join(plate_folder, f'CW_{plate_number}_Summery.csv'))

    for stat in desc_wells.columns.unique(1):
        nest_desc = desc_wells.xs(stat, level=1, axis=1).describe()
        nest_desc.to_csv(path.join(plate_folder, f'CW_{plate_number}_{stat}.csv'))

    desc = df.groupby(['Plate']).describe()

    return desc, {stat: desc.xs(stat, level=1, axis=1) for stat in desc.columns.unique(1)}, desc_wells
예제 #2
0
def extract_dist_score(plate_csv, well_type='treated', **kwargs):
    df = load_plate_csv(plate_csv)
    df = df.groupby(by=[
        'Plate', LABEL_FIELD, 'Metadata_broad_sample', 'Image_Metadata_Well'
    ]).apply(lambda g: g.mean())

    def calculate_distance_from(v):
        return lambda x: np.linalg.norm(x - v)

    _, _, channels = list_columns(df)
    all_cols = [col for ch_cols in channels.values() for col in ch_cols]
    channels['ALL'] = all_cols

    scores = []
    for channel, cols in channels.items():
        df_mck = df[df.index.isin(['mock'], 1)][cols]
        mck_profile = df_mck.median()
        df_trt = df[df.index.isin([well_type], 1)][cols]

        dist_func = calculate_distance_from(mck_profile)
        trt_dist = df_trt.apply(dist_func, axis=1)
        del df_trt

        trt_dist.name = channel

        scores.append(trt_dist)

    del df

    scores_df = pd.concat(scores, axis=1)
    return scores_df
예제 #3
0
def extract_dist_score_norm_before(plate_csv, well_type='treated', **kwargs):
    df = load_pure_zscores(plate_csv, kwargs['raw'], kwargs['inter_channel'])

    def calculate_distance_from(v):
        return lambda x: np.linalg.norm(x - v)

    _, _, channels = list_columns(df)
    all_cols = [col for ch_cols in channels.values() for col in ch_cols]
    channels['ALL'] = all_cols

    scores = []
    for channel, cols in channels.items():
        df_mck = df[df.index.isin(['mock'], 1)][cols]
        mck_profile = df_mck.median()
        df_trt = df[df.index.isin([well_type], 1)][cols]

        dist_func = calculate_distance_from(mck_profile)
        trt_dist = df_trt.apply(dist_func, axis=1)
        del df_trt

        trt_dist.name = channel

        scores.append(trt_dist)

    del df

    scores_df = pd.concat(scores, axis=1)
    return scores_df
예제 #4
0
def extract_score(plate_csv,
                  by_well=True,
                  by_channel=True,
                  abs_zscore=True,
                  well_type='treated',
                  raw=False,
                  thresh=4,
                  inter_channel=True):
    df = load_pure_zscores(plate_csv, raw, inter_channel)

    if well_type in ['treated', 'mock']:
        df_selected = df[df.index.isin([well_type], 1)]
        del df
    else:
        df_selected = df

    if abs_zscore:
        df_selected = df_selected.abs()

    df_selected = df_selected.apply(lambda x: x.apply(lambda y: 0
                                                      if y < thresh else 1))

    if by_channel:
        _, _, channels = list_columns(df_selected)
        for channel, cols in channels.items():
            df_selected[channel] = df_selected[cols].sum(axis=1) / len(cols)

        channels_cols = [
            col for ch_cols in channels.values() for col in ch_cols
        ]
        df_selected["ALL"] = df_selected[channels_cols].sum(
            axis=1) / len(channels_cols)

        data = df_selected[CHANNELS + ["ALL"]]
        del df_selected
    else:
        data = df_selected

    if by_well:
        gb = data.groupby(
            by=['Plate', 'Metadata_broad_sample', 'Image_Metadata_Well'])
        del data

        by_trt = gb.apply(lambda g: g.mean())
        return by_trt

    return data
예제 #5
0
def extract_dist_score_norm_after(plate_csv, well_type='treated', **kwargs):
    df = load_plate_csv(plate_csv)
    df = df.groupby(by=[
        'Plate', LABEL_FIELD, 'Metadata_broad_sample', 'Image_Metadata_Well'
    ]).apply(lambda g: g.mean())

    def calculate_distance_from(v):
        return lambda x: np.linalg.norm(x - v)

    _, _, channels = list_columns(df)
    all_cols = [col for ch_cols in channels.values() for col in ch_cols]
    channels['ALL'] = all_cols

    scores = []
    for channel, cols in channels.items():
        df_mck = df[df.index.isin(['mock'], 1)][cols]
        mck_profile = df_mck.median()
        df_trt = df[df.index.isin([well_type], 1)][cols]

        dist_func = calculate_distance_from(mck_profile)
        mck_dist = df_trt.apply(dist_func, axis=1)
        del df_mck
        trt_dist = df_trt.apply(dist_func, axis=1)
        del df_trt

        scaler = StandardScaler()
        scaler.fit(mck_dist.to_numpy().reshape(-1, 1))
        del mck_dist

        cur_scores = pd.Series(scaler.transform(trt_dist.to_numpy().reshape(
            -1, 1)).reshape(-1),
                               index=trt_dist.index,
                               name=channel)
        del trt_dist

        scores.append(cur_scores)

    del df

    scores_df = pd.concat(scores, axis=1)
    return scores_df