def run_chirp_feature_vector_extraction(output_dir, output_code, include_failed_cells, specimen_ids, chirp_stimulus_codes, data_source="lims", run_parallel=True): logging.info("Number of specimens to process: {:d}".format( len(specimen_ids))) # Include and name chirp stimulus codes in ontology ontology_data = ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE) edited_ontology_data = edit_ontology_data(ontology_data, chirp_stimulus_codes, new_name_tag="Chirp", new_core_tag="Core 2") ontology = StimulusOntology(edited_ontology_data) get_data_partial = partial(data_for_specimen_id, data_source=data_source, ontology=ontology) if run_parallel: pool = Pool() results = pool.map(get_data_partial, specimen_ids) else: results = map(get_data_partial, specimen_ids) used_ids, results, error_set = su.filter_results(specimen_ids, results) logging.info("Finished with {:d} processed specimens".format( len(used_ids))) results_dict = su.organize_results(used_ids, results) su.save_results_to_npy(used_ids, results_dict, output_dir, output_code) su.save_errors_to_json(error_set, output_dir, output_code) logging.info("Finished saving")
def run_feature_vector_extraction(output_dir, data_source, output_code, project, output_file_type, sweep_qc_option, include_failed_cells, run_parallel, ap_window_length, ids=None, file_list=None, **kwargs): """ Extract feature vector from a list of cells and save result to the output file(s) Parameters ---------- output_dir : str see CollectFeatureVectorParameters input schema for details data_source : str see CollectFeatureVectorParameters input schema for details output_code: str see CollectFeatureVectorParameters input schema for details project : str see CollectFeatureVectorParameters input schema for details output_file_type : str see CollectFeatureVectorParameters input schema for details sweep_qc_option: str see CollectFeatureVectorParameters input schema for details include_failed_cells: bool see CollectFeatureVectorParameters input schema for details run_parallel: bool see CollectFeatureVectorParameters input schema for details ap_window_length: float see CollectFeatureVectorParameters input schema for details ids: int ids associated to each cell. file_list: list of str nwbfile names kwargs Returns ------- """ if ids is not None: specimen_ids = ids elif data_source == "lims": specimen_ids = lq.project_specimen_ids( project, passed_only=not include_failed_cells) else: logging.error("Must specify input file if data source is not LIMS") if output_file_type == "h5": # Check that we can access the specified file before processing everything h5_file = h5py.File( os.path.join(output_dir, "fv_{}.h5".format(output_code))) h5_file.close() ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) logging.info("Number of specimens to process: {:d}".format( len(specimen_ids))) get_data_partial = partial(data_for_specimen_id, sweep_qc_option=sweep_qc_option, data_source=data_source, ontology=ontology, ap_window_length=ap_window_length, file_list=file_list) if run_parallel: pool = Pool() results = pool.map(get_data_partial, specimen_ids) else: results = map(get_data_partial, specimen_ids) used_ids, results, error_set = su.filter_results(specimen_ids, results) logging.info("Finished with {:d} processed specimens".format( len(used_ids))) results_dict = su.organize_results(used_ids, results) if output_file_type == "h5": su.save_results_to_h5(used_ids, results_dict, output_dir, output_code) elif output_file_type == "npy": su.save_results_to_npy(used_ids, results_dict, output_dir, output_code) else: raise ValueError( "Unknown output_file_type option {} (allowed values are h5 and npy)" .format(output_file_type)) su.save_errors_to_json(error_set, output_dir, output_code) logging.info("Finished saving")
if run_parallel: pool = Pool() results = pool.map(get_data_partial, specimen_ids) else: results = map(get_data_partial, specimen_ids) used_ids, results, error_set = su.filter_results(specimen_ids, results) logging.info("Finished with {:d} processed specimens".format(len(used_ids))) results_dict = su.organize_results(used_ids, results) if output_file_type == "h5": su.save_results_to_h5(used_ids, results_dict, output_dir, output_code) elif output_file_type == "npy": su.save_results_to_npy(used_ids, results_dict, output_dir, output_code) else: raise ValueError("Unknown output_file_type option {} (allowed values are h5 and npy)".format(output_file_type)) su.save_errors_to_json(error_set, output_dir, output_code) logging.info("Finished saving") def main(): module = ags.ArgSchemaParser(schema_type=CollectFeatureVectorParameters) if module.args["input"]: # input file should be list of IDs on each line with open(module.args["input"], "r") as f: ids = [int(line.strip("\n")) for line in f] run_feature_vector_extraction(ids=ids, **module.args) else: