FEAT_PATH = Path(SAVE_DIR) / 'features.csv' if not META_PATH.exists() and not FEAT_PATH.exists(): # compile metadata metadata, metadata_path = compile_metadata(aux_dir=AUX_DIR, imaging_dates=IMAGING_DATES, n_wells=N_WELLS, add_well_annotations=N_WELLS==96, from_source_plate=False) # compile window summaries features, metadata = process_feature_summaries(metadata_path=metadata_path, results_dir=RES_DIR, compile_day_summaries=True, imaging_dates=IMAGING_DATES, align_bluelight=False, window_summaries=True, n_wells=N_WELLS) # clean results features, metadata = clean_summary_results(features, metadata, feature_columns=None, nan_threshold_row=NAN_THRESHOLD_ROW, nan_threshold_col=NAN_THRESHOLD_COL, max_value_cap=1e15, imputeNaN=True, min_nskel_per_video=MIN_NSKEL_PER_VIDEO, min_nskel_sum=MIN_NSKEL_SUM, drop_size_related_feats=False,
args = load_json(args.json) aux_dir = Path(args.project_dir) / 'AuxiliaryFiles' results_dir = Path(args.project_dir) / 'Results' # load metadata metadata, metadata_path = process_metadata( aux_dir, imaging_dates=args.dates, add_well_annotations=args.add_well_annotations, n_wells=6) features, metadata = process_feature_summaries(metadata_path, results_dir, compile_day_summaries=False, imaging_dates=args.dates, align_bluelight=False, window_summaries=True, n_wells=6) # Subset results (rows) to remove entries for wells with unknown strain data for 'gene_name' n = metadata.shape[0] metadata = metadata.loc[~metadata['gene_name'].isna(), :] features = features.reindex(metadata.index) print("%d entries removed with no gene name metadata" % (n - metadata.shape[0])) # update gene names for mutant strains metadata['gene_name'] = [ args.control_dict['gene_name'] if s == 'BW' else s for s in metadata['gene_name']
TEST_NAME = args.test # str, Choose between 'LMM' (if >1 day replicate), 'ANOVA' or 'Kruskal' # Kruskal tests are performed instead of ANOVA if check_normal and data is not normally distributed) # If significant features are found, pairwise t-tests are performed #%% Compile and clean results # Process metadata metadata, metadata_path = process_metadata(aux_dir=AUX_DIR, imaging_dates=args.dates, add_well_annotations=args.add_well_annotations) # Process feature summary results features, metadata = process_feature_summaries(metadata_path, RESULTS_DIR, compile_day_summaries=args.compile_day_summaries, imaging_dates=args.dates, align_bluelight=args.align_bluelight) # Clean: remove data with too many NaNs/zero std and impute remaining NaNs features, metadata = clean_summary_results(features, metadata, feature_columns=None, imputeNaN=args.impute_nans, nan_threshold=args.nan_threshold, max_value_cap=args.max_value_cap, drop_size_related_feats=args.drop_size_features, norm_feats_only=args.norm_features_only, percentile_to_use=args.percentile_to_use) # Load supplementary info + append to metadata
parser = argparse.ArgumentParser(description="Compile Keio screen metadata & feature summaries") parser.add_argument('-j', '--json', help="Path to JSON parameters file for analysis", default=JSON_PARAMETERS_PATH, type=str) args = parser.parse_args() args = load_json(args.json) assert args.project_dir is not None aux_dir = Path(args.project_dir) / "AuxiliaryFiles" results_dir = Path(args.project_dir) / "Results" metadata, metadata_path = process_metadata(aux_dir=aux_dir, imaging_dates=args.dates, add_well_annotations=True) metadata, features = process_feature_summaries(METADATA_PATH, results_dir, compile_day_summaries=False, imaging_dates=args.dates, align_bluelight=False) features, metadata = compile_bluelight_window_results(args) # Save features to file features_path = Path(args.save_dir) / 'features.csv' features.to_csv(features_path, index=False) # Save metadata to file metadata_path = Path(args.save_dir) / 'metadata.csv' metadata.to_csv(metadata_path, index=False) toc = time() print("\nDone in %.1f seconds (%.1f minutes)" % (toc - tic, (toc - tic) / 60))