def get_classes_from_stimulus_list(stimulus_list): "use `stimulus.metadata.class` as (string) key, and resolve to a number" class_resolver = dict() for s in stimulus_list: metadata = s['stimulus']['metadata'] if "class" in metadata: class_resolver[str(metadata['class'])] = str( metadata['classLabels']) label = glia.get_value(class_resolver) for i, k in enumerate(class_resolver.keys()): assert class_resolver[k] == label # we only support one classLabel type class_resolver[k] = i return class_resolver
def save_grating_npz(units, stimulus_list, name, append, group_by, sinusoid=False): "Psychophysics discrimination grating 0.2.0" print("Saving grating NPZ file.") if sinusoid: stimulus_type = "SINUSOIDAL_GRATING" else: stimulus_type = 'GRATING' get_gratings = glia.compose( partial(glia.create_experiments, stimulus_list=stimulus_list, append_lifespan=append), glia.f_filter(lambda x: x['stimulusType'] == stimulus_type), partial(glia.group_by, key=group_by), glia.f_map(partial(glia.group_by, key=lambda x: x["width"])), glia.f_map( glia.f_map( partial(glia.group_by, key=lambda x: x["metadata"]["cohort"])))) gratings = get_gratings(units) max_duration = 0.0 for condition, sizes in gratings.items(): for size, cohorts in sizes.items(): for cohort, experiments in cohorts.items(): max_duration = max(max_duration, experiments[0]['lifespan']) max_duration += append conditions = sorted(list(gratings.keys())) print("Conditions:", name, conditions) nconditions = len(conditions) example_condition = glia.get_value(gratings) sizes = sorted(list(example_condition.keys())) print("Sizes:", sizes) nsizes = len(sizes) example_size = glia.get_value(example_condition) ncohorts = len(example_size) # print(list(gratings.values())) d = int(np.ceil(max_duration * 1000)) # 1ms bins tvt = glia.tvt_by_percentage(ncohorts, 60, 40, 0) # 2 per cohort training_data = np.full((nconditions, nsizes, tvt.training * 2, d, Unit.nrow, Unit.ncol, Unit.nunit), 0, dtype='int8') training_target = np.full((nconditions, nsizes, tvt.training * 2), 0, dtype='int8') validation_data = np.full((nconditions, nsizes, tvt.validation * 2, d, Unit.nrow, Unit.ncol, Unit.nunit), 0, dtype='int8') validation_target = np.full((nconditions, nsizes, tvt.validation * 2), 0, dtype='int8') condition_map = {c: i for i, c in enumerate(conditions)} size_map = {s: i for i, s in enumerate(sizes)} for condition, sizes in gratings.items(): for size, cohorts in sizes.items(): X = glia.f_split_dict(tvt)(cohorts) td, tt = glia.experiments_to_ndarrays(glia.training_cohorts(X), get_grating_class_from_stim, append) missing_duration = d - td.shape[1] pad_td = np.pad(td, ((0, 0), (0, missing_duration), (0, 0), (0, 0), (0, 0)), mode='constant') condition_index = condition_map[condition] size_index = size_map[size] training_data[condition_index, size_index] = pad_td training_target[condition_index, size_index] = tt td, tt = glia.experiments_to_ndarrays(glia.validation_cohorts(X), get_grating_class_from_stim, append) pad_td = np.pad(td, ((0, 0), (0, missing_duration), (0, 0), (0, 0), (0, 0)), mode='constant') validation_data[condition_index, size_index] = pad_td validation_target[condition_index, size_index] = tt print('saving to ', name) np.savez(name, training_data=training_data, training_target=training_target, validation_data=validation_data, validation_target=validation_target)
def save_checkerboard_flicker_npz(units, stimulus_list, name, append, group_by, quad=False): "Psychophysics discrimination checkerboard 0.2.0" print("Saving checkerboard NPZ file.") get_checkers = glia.compose( partial( glia.create_experiments, progress=True, append_lifespan=append, # stimulus_list=stimulus_list,append_lifespan=0.5), stimulus_list=stimulus_list), partial(glia.group_by, key=lambda x: x["metadata"]["group"]), glia.group_dict_to_list, glia.f_filter(group_contains_checkerboard), glia.f_map( glia.f_filter(lambda x: x['stimulusType'] == 'CHECKERBOARD')), glia.f_map(glia.merge_experiments), partial(glia.group_by, key=group_by), glia.f_map(partial(glia.group_by, key=lambda x: x["size"])), glia.f_map( glia.f_map( partial(glia.group_by, key=lambda x: x["metadata"]["cohort"])))) checkers = get_checkers(units) max_duration = 0.0 for condition, sizes in checkers.items(): for size, cohorts in sizes.items(): for cohort, experiments in cohorts.items(): max_duration = max(max_duration, experiments[0]['lifespan']) max_duration += append print(f"max_duration: {max_duration}") conditions = sorted(list(checkers.keys())) print("Conditions:", name, conditions) nconditions = len(conditions) example_condition = glia.get_value(checkers) sizes = sorted(list(example_condition.keys())) nsizes = len(sizes) # TODO remove if max_duration < 9: print(example_condition) example_size = glia.get_value(example_condition) ncohorts = len(example_size) # print(list(checkers.values())) d = int(np.ceil(max_duration * 1000)) # 1ms bins tvt = glia.tvt_by_percentage(ncohorts, 60, 40, 0) logger.info(f"{tvt}, {ncohorts}") # (TODO?) 2 dims for first checkerboard and second checkerboard # 4 per cohort if quad: ntraining = tvt.training * 4 nvalid = tvt.validation * 4 else: ntraining = tvt.training * 2 nvalid = tvt.validation * 2 training_data = np.full( (nconditions, nsizes, ntraining, d, Unit.nrow, Unit.ncol, Unit.nunit), 0, dtype='int8') training_target = np.full((nconditions, nsizes, ntraining), 0, dtype='int8') validation_data = np.full( (nconditions, nsizes, nvalid, d, Unit.nrow, Unit.ncol, Unit.nunit), 0, dtype='int8') validation_target = np.full((nconditions, nsizes, nvalid), 0, dtype='int8') # test_data = np.full((nsizes,tvt.test,d,nunits),0,dtype='int8') # test_target = np.full((nsizes,tvt.test),0,dtype='int8') if quad: get_class = get_checker_quad_discrimination_class else: get_class = get_checker_discrimination_class condition_map = {c: i for i, c in enumerate(conditions)} size_map = {s: i for i, s in enumerate(sizes)} for condition, sizes in checkers.items(): for size, cohorts in sizes.items(): X = glia.f_split_dict(tvt)(cohorts) td, tt = glia.experiments_to_ndarrays(glia.training_cohorts(X), get_class, append) logger.info(td.shape) missing_duration = d - td.shape[1] pad_td = np.pad(td, ((0, 0), (0, missing_duration), (0, 0), (0, 0), (0, 0)), mode='constant') condition_index = condition_map[condition] size_index = size_map[size] training_data[condition_index, size_index] = pad_td training_target[condition_index, size_index] = tt td, tt = glia.experiments_to_ndarrays(glia.validation_cohorts(X), get_class, append) pad_td = np.pad(td, ((0, 0), (0, missing_duration), (0, 0), (0, 0), (0, 0)), mode='constant') validation_data[condition_index, size_index] = pad_td validation_target[condition_index, size_index] = tt print('saving to ', name) np.savez(name, training_data=training_data, training_target=training_target, validation_data=validation_data, validation_target=validation_target)
def save_images_h5(units, stimulus_list, name, frame_log, video_file, append): """Assumes each group is three stimuli with image in second position. Concatenate second stimuli with first 0.5s of third stimuli""" # open first so if there's a problem we don't waste time compression_level = 3 dset_filter = tables.filters.Filters(complevel=compression_level, complib='blosc:zstd') with tables.open_file(name + ".h5", 'w') as h5: class_resolver = get_classes_from_stimulus_list(stimulus_list) nclasses = len(class_resolver) frames, image_classes = glia.get_images_from_vid( stimulus_list, frame_log, video_file) image_class_num = list( map(lambda x: class_resolver[str(x)], image_classes)) idx_sorted_order = np.argsort(image_class_num) # save mapping of class_num target to class metadata # this way h5.root.image_classes[n] will give the class metadata string logger.info("create class_resolver with max string of 256") resolver = h5.create_carray(h5.root, "image_classes", tables.StringAtom(itemsize=256), (nclasses, )) img_class_array = np.array(image_classes, dtype="S256")[idx_sorted_order] for i, image_class in enumerate(img_class_array): resolver[i] = image_class atom = tables.Atom.from_dtype(frames[0].dtype) images = h5.create_carray(h5.root, "images", atom, (nclasses, *frames[0].shape), filters=dset_filter) frames = np.array(frames) nFrames = len(frames) for i, idx in enumerate(idx_sorted_order): if idx >= nFrames: logger.warn( f"skipping class {image_classes[idx]} as no accompanying frame. This should only occur if experiment stopped early." ) continue images[i] = frames[idx] print("finished saving images") get_image_responses = glia.compose( # returns a list partial(glia.create_experiments, stimulus_list=stimulus_list, progress=True, append_lifespan=append), partial(glia.group_by, key=lambda x: x["metadata"]["group"]), glia.group_dict_to_list, glia.f_filter(partial(glia.group_contains, "IMAGE")), # truncate to 0.5s glia.f_map(lambda x: [x[1], truncate(x[2], 0.5)]), glia.f_map(glia.merge_experiments), partial(glia.group_by, key=lambda x: x["metadata"]["cohort"]), # glia.f_map(f_flatten) ) image_responses = get_image_responses(units) ncohorts = len(image_responses) ex_cohort = glia.get_value(image_responses) images_per_cohort = len(ex_cohort) print("images_per_cohort", images_per_cohort) duration = ex_cohort[0]["lifespan"] d = int(np.ceil(duration * 1000)) # 1ms bins logger.info(f"ncohorts: {ncohorts}") # import pdb; pdb.set_trace() logger.info(f"nclasses: {nclasses}") if nclasses < 256: class_dtype = np.dtype('uint8') else: class_dtype = np.dtype('uint16') class_resolver_func = lambda c: class_resolver[str(c)] # determine shape experiments = glia.flatten_group_dict(image_responses) nE = len(experiments) d = int(np.ceil(duration * 1000)) # 1ms bins data_shape = (nE, d, Unit.nrow, Unit.ncol, Unit.nunit) print(f"writing to {name}.h5 with zstd compression...") data = h5.create_carray("/", "data", tables.Atom.from_dtype(np.dtype('uint8')), shape=data_shape, filters=dset_filter) target = h5.create_carray("/", "target", tables.Atom.from_dtype(class_dtype), shape=(nE, ), filters=dset_filter) glia.experiments_to_h5(experiments, data, target, partial(get_image_class_from_stim, class_resolver=class_resolver_func), append, class_dtype=class_dtype)
def save_acuity_image_npz(units, stimulus_list, name, append): "Assumes metadata includes a parameter to group by, as well as a blank image" get_letters = glia.compose( partial(glia.create_experiments, stimulus_list=stimulus_list, progress=True, append_lifespan=append), partial(glia.group_by, key=lambda x: x["metadata"]["group"]), glia.group_dict_to_list, glia.f_filter(partial(glia.group_contains, "IMAGE")), glia.f_map(lambda x: x[0:2]), partial(glia.group_by, key=lambda x: x[1]["metadata"]["parameter"]), glia.f_map( partial(glia.group_by, key=lambda x: x[1]["metadata"]["cohort"])), glia.f_map(glia.f_map(f_flatten)), glia.f_map(glia.f_map(partial(balance_blanks, key='image')))) letters = get_letters(units) sizes = sorted(list(letters.keys())) nsizes = len(sizes) ncohorts = len(list(letters.values())[0]) ex_letters = glia.get_value(list(letters.values())[0]) nletters = len(ex_letters) print("nletters", nletters) duration = ex_letters[0]["lifespan"] # small hack to fix bug in letters 0.2.0 letter_duration = ex_letters[1]['lifespan'] if duration != letter_duration: new_letters = {} for size, cohorts in letters.items(): new_letters[size] = {} for cohort, stimuli in cohorts.items(): new_letters[size][cohort] = list( map(lambda s: truncate(s, letter_duration), stimuli)) letters = new_letters d = int(np.ceil(duration * 1000)) # 1ms bins nunits = len(units.keys()) tvt = glia.tvt_by_percentage(ncohorts, 60, 40, 0) logger.info(f"{tvt}, ncohorts: {ncohorts}") experiments_per_cohort = 11 training_data = np.full((nsizes, tvt.training * experiments_per_cohort, d, Unit.nrow, Unit.ncol, Unit.nunit), 0, dtype='int8') training_target = np.full((nsizes, tvt.training * experiments_per_cohort), 0, dtype='int8') validation_data = np.full((nsizes, tvt.validation * experiments_per_cohort, d, Unit.nrow, Unit.ncol, Unit.nunit), 0, dtype='int8') validation_target = np.full( (nsizes, tvt.validation * experiments_per_cohort), 0, dtype='int8') size_map = {s: i for i, s in enumerate(sizes)} for size, cohorts in letters.items(): X = glia.f_split_dict(tvt)(cohorts) logger.info(f"ncohorts: {len(cohorts)}") td, tt = glia.experiments_to_ndarrays(glia.training_cohorts(X), acuity_image_class, append) logger.info(td.shape) missing_duration = d - td.shape[1] pad_td = np.pad(td, ((0, 0), (0, missing_duration), (0, 0), (0, 0), (0, 0)), mode='constant') size_index = size_map[size] training_data[size_index] = pad_td training_target[size_index] = tt td, tt = glia.experiments_to_ndarrays(glia.validation_cohorts(X), acuity_image_class, append) pad_td = np.pad(td, ((0, 0), (0, missing_duration), (0, 0), (0, 0), (0, 0)), mode='constant') validation_data[size_index] = pad_td validation_target[size_index] = tt np.savez(name, training_data=training_data, training_target=training_target, validation_data=validation_data, validation_target=validation_target)
def save_letter_npz(units, stimulus_list, name, append): print( "Saving letter NPZ file. Warning: not including Off response--performance can be improved!" ) # TODO use merge_experiment # TODO add TEST!!! get_letters = glia.compose( partial(glia.create_experiments, stimulus_list=stimulus_list, progress=True, append_lifespan=append), partial(glia.group_by, key=lambda x: x["metadata"]["group"]), glia.group_dict_to_list, glia.f_filter(group_contains_letter), glia.f_map(lambda x: x[0:2]), partial(glia.group_by, key=lambda x: x[1]["size"]), glia.f_map( partial(glia.group_by, key=lambda x: x[1]["metadata"]["cohort"])), glia.f_map(glia.f_map(f_flatten)), glia.f_map(glia.f_map(balance_blanks))) letters = get_letters(units) sizes = sorted(list(letters.keys())) nsizes = len(sizes) ncohorts = len(list(letters.values())[0]) ex_letters = glia.get_value(list(letters.values())[0]) nletters = len(ex_letters) print("nletters", nletters) duration = ex_letters[0]["lifespan"] d = int(np.ceil(duration * 1000)) # 1ms bins nunits = len(units.keys()) tvt = glia.tvt_by_percentage(ncohorts, 60, 40, 0) logger.info(f"{tvt}, ncohorts: {ncohorts}") experiments_per_cohort = 11 training_data = np.full((nsizes, tvt.training * experiments_per_cohort, d, Unit.nrow, Unit.ncol, Unit.nunit), 0, dtype='int8') training_target = np.full((nsizes, tvt.training * experiments_per_cohort), 0, dtype='int8') validation_data = np.full((nsizes, tvt.validation * experiments_per_cohort, d, Unit.nrow, Unit.ncol, Unit.nunit), 0, dtype='int8') validation_target = np.full( (nsizes, tvt.validation * experiments_per_cohort), 0, dtype='int8') size_map = {s: i for i, s in enumerate(sizes)} for size, cohorts in letters.items(): X = glia.f_split_dict(tvt)(cohorts) logger.info(f"ncohorts: {len(cohorts)}") td, tt = glia.experiments_to_ndarrays(glia.training_cohorts(X), letter_class, append) logger.info(td.shape) missing_duration = d - td.shape[1] pad_td = np.pad(td, ((0, 0), (0, missing_duration), (0, 0), (0, 0), (0, 0)), mode='constant') size_index = size_map[size] training_data[size_index] = pad_td training_target[size_index] = tt td, tt = glia.experiments_to_ndarrays(glia.validation_cohorts(X), letter_class, append) pad_td = np.pad(td, ((0, 0), (0, missing_duration), (0, 0), (0, 0), (0, 0)), mode='constant') validation_data[size_index] = pad_td validation_target[size_index] = tt np.savez(name, training_data=training_data, training_target=training_target, validation_data=validation_data, validation_target=validation_target)