예제 #1
0
 FEAT_PATH = Path(SAVE_DIR) / 'features.csv'
 
 if not META_PATH.exists() and not FEAT_PATH.exists():
 
     # compile metadata
     metadata, metadata_path = compile_metadata(aux_dir=AUX_DIR, 
                                                imaging_dates=IMAGING_DATES, 
                                                n_wells=N_WELLS,
                                                add_well_annotations=N_WELLS==96,
                                                from_source_plate=False)
     
     # compile window summaries
     features, metadata = process_feature_summaries(metadata_path=metadata_path, 
                                                    results_dir=RES_DIR, 
                                                    compile_day_summaries=True,
                                                    imaging_dates=IMAGING_DATES, 
                                                    align_bluelight=False,
                                                    window_summaries=True,
                                                    n_wells=N_WELLS)
     
     # clean results
     features, metadata = clean_summary_results(features, 
                                                metadata,
                                                feature_columns=None,
                                                nan_threshold_row=NAN_THRESHOLD_ROW,
                                                nan_threshold_col=NAN_THRESHOLD_COL,
                                                max_value_cap=1e15,
                                                imputeNaN=True,
                                                min_nskel_per_video=MIN_NSKEL_PER_VIDEO,
                                                min_nskel_sum=MIN_NSKEL_SUM,
                                                drop_size_related_feats=False,
예제 #2
0
    args = load_json(args.json)

    aux_dir = Path(args.project_dir) / 'AuxiliaryFiles'
    results_dir = Path(args.project_dir) / 'Results'

    # load metadata
    metadata, metadata_path = process_metadata(
        aux_dir,
        imaging_dates=args.dates,
        add_well_annotations=args.add_well_annotations,
        n_wells=6)

    features, metadata = process_feature_summaries(metadata_path,
                                                   results_dir,
                                                   compile_day_summaries=False,
                                                   imaging_dates=args.dates,
                                                   align_bluelight=False,
                                                   window_summaries=True,
                                                   n_wells=6)

    # Subset results (rows) to remove entries for wells with unknown strain data for 'gene_name'
    n = metadata.shape[0]
    metadata = metadata.loc[~metadata['gene_name'].isna(), :]
    features = features.reindex(metadata.index)
    print("%d entries removed with no gene name metadata" %
          (n - metadata.shape[0]))

    # update gene names for mutant strains
    metadata['gene_name'] = [
        args.control_dict['gene_name'] if s == 'BW' else s
        for s in metadata['gene_name']
    TEST_NAME = args.test # str, Choose between 'LMM' (if >1 day replicate), 'ANOVA' or 'Kruskal' 
    # Kruskal tests are performed instead of ANOVA if check_normal and data is not normally distributed) 
    # If significant features are found, pairwise t-tests are performed

    #%% Compile and clean results
        
    # Process metadata    
    metadata, metadata_path = process_metadata(aux_dir=AUX_DIR,
                                               imaging_dates=args.dates, 
                                               add_well_annotations=args.add_well_annotations)
    
    # Process feature summary results
    features, metadata = process_feature_summaries(metadata_path, 
                                                   RESULTS_DIR,
                                                   compile_day_summaries=args.compile_day_summaries,
                                                   imaging_dates=args.dates,
                                                   align_bluelight=args.align_bluelight)
    
    # Clean: remove data with too many NaNs/zero std and impute remaining NaNs
    features, metadata = clean_summary_results(features, 
                                               metadata,
                                               feature_columns=None,
                                               imputeNaN=args.impute_nans,
                                               nan_threshold=args.nan_threshold,
                                               max_value_cap=args.max_value_cap,
                                               drop_size_related_feats=args.drop_size_features,
                                               norm_feats_only=args.norm_features_only,
                                               percentile_to_use=args.percentile_to_use)
    
    # Load supplementary info + append to metadata
    parser = argparse.ArgumentParser(description="Compile Keio screen metadata & feature summaries")
    parser.add_argument('-j', '--json', help="Path to JSON parameters file for analysis",
                        default=JSON_PARAMETERS_PATH, type=str)
    args = parser.parse_args()    
    args = load_json(args.json)
    
    assert args.project_dir is not None
    aux_dir = Path(args.project_dir) / "AuxiliaryFiles"
    results_dir = Path(args.project_dir) / "Results"
    
    metadata, metadata_path = process_metadata(aux_dir=aux_dir,
                                               imaging_dates=args.dates,
                                               add_well_annotations=True)
    
    metadata, features = process_feature_summaries(METADATA_PATH, results_dir, 
                                                   compile_day_summaries=False, 
                                                   imaging_dates=args.dates, 
                                                   align_bluelight=False)
    
    features, metadata = compile_bluelight_window_results(args)
    
    # Save features to file
    features_path = Path(args.save_dir) / 'features.csv'
    features.to_csv(features_path, index=False) 

    # Save metadata to file
    metadata_path = Path(args.save_dir) / 'metadata.csv'
    metadata.to_csv(metadata_path, index=False)
    
    toc = time()
    print("\nDone in %.1f seconds (%.1f minutes)" % (toc - tic, (toc - tic) / 60))