def download_all_dlc(): eids = get_repeated_sites() one = ONE() dataset_types = ['camera.dlc', 'camera.times'] for eid in eids: try: a = one.list(eid, 'dataset-types') # for newer iblib version do [x['dataset_type'] for x in a] if not all([x['dataset_type'] for x in a]): print('not all data available') continue one.load(eid, dataset_types=dataset_types) except: continue
def get_example_images(eid): eids = get_repeated_sites() # eid = eids[23] # video_type = 'body' #eids = ['15f742e1-1043-45c9-9504-f1e8a53c1744'] eids = ['4a45c8ba-db6f-4f11-9403-56e06a33dfa4'] frts = {'body': 30, 'left': 60, 'right': 150} one = ONE() #for eid in eids: for video_type in frts: frame_idx = [20 * 60 * frts[video_type]] try: r = one.list(eid, 'dataset_types') recs = [ x for x in r if f'{video_type}Camera.raw.mp4' in x['name'] ][0]['file_records'] video_path = [ x['data_url'] for x in recs if x['data_url'] is not None ][0] frames = get_video_frames_preload(video_path, frame_idx, mask=np.s_[:, :, 0]) np.save( '/home/mic/reproducible_dlc/example_images/' f'{eid}_{video_type}.npy', frames) print(eid, video_type, 'done') except: print(eid, video_type, 'error') continue
def get_ME(eid, video_type): #video_type = 'left' one = ONE() dataset_types = ['camera.ROIMotionEnergy', 'camera.times'] a = one.list(eid, 'dataset-types') # for newer iblib version do [x['dataset_type'] for x in a] # if not all([(u in [x['dataset_type'] for x in a]) for u in dataset_types]): # print('not all data available') # return one.load(eid, dataset_types=dataset_types) local_path = one.path_from_eid(eid) alf_path = local_path / 'alf' cam0 = alf.io.load_object(alf_path, '%sCamera' % video_type, namespace='ibl') ME = np.load(alf_path / f'{video_type}Camera.ROIMotionEnergy.npy') Times = cam0['times'] return Times, ME
def get_dlc_XYs(eid, video_type): #video_type = 'left' one = ONE() dataset_types = ['camera.dlc', 'camera.times'] a = one.list(eid, 'dataset-types') # for newer iblib version do [x['dataset_type'] for x in a] # if not all([(u in [x['dataset_type'] for x in a]) for u in dataset_types]): # print('not all data available') # return one.load(eid, dataset_types=dataset_types) #clobber=True # force download local_path = one.path_from_eid(eid) alf_path = local_path / 'alf' cam0 = alf.io.load_object(alf_path, '%sCamera' % video_type, namespace='ibl') Times = cam0['times'] cam = cam0['dlc'] points = np.unique(['_'.join(x.split('_')[:-1]) for x in cam.keys()]) # Set values to nan if likelyhood is too low # for pqt: .to_numpy() XYs = {} for point in points: x = np.ma.masked_where(cam[point + '_likelihood'] < 0.9, cam[point + '_x']) x = x.filled(np.nan) y = np.ma.masked_where(cam[point + '_likelihood'] < 0.9, cam[point + '_y']) y = y.filled(np.nan) XYs[point] = np.array([x, y]) return Times, XYs
try: _ = alf.io.load_object(session_path.joinpath('alf'), 'probes') except FileNotFoundError: print(session_path.joinpath('alf')) print("no probes") continue probe_path = session_path.joinpath('alf', probe) try: metrics = alf.io.load_object(probe_path, object='clusters.metrics') except FileNotFoundError: print(probe_path) print("one probe missing") continue labs.append(one.list(eid, 'labs')) metric_list.append(metrics.metrics) ss.append(s) def split(d, l): dizzle = {} for data, label, in zip(d, l): if label not in dizzle: dizzle[label] = [data] else: dizzle[label].append(data) return list(dizzle.values()) for i, (metric, metric_name) in enumerate(metric_funcs):
from oneibl.one import ONE from plot import gen_figures # if you could also try just adding 'ibllib - brainbox', 'iblscripts - certification', and 'analysis - cert_master_fn' repositories (on those branches) to your python path import sys sys.path.append('~/Documents/code/ibllib') sys.path.append('~/Documents/code/iblscripts') one = ONE() eid = one.search(subject='ZM_2104', date='2019-09-19', number=1)[0] one.load(eid, dataset_types=one.list(), clobber=False, download_only=True) gen_figures(eid, probe='probe_right', cluster_ids_summary=1)
"probe00", "probe00", "probe00", "probe00", "probe01", "probe01", "probe00", "probe01", "probe00", "probe01", "probe00", "probe01", "probe00", "probe00", "probe00", "probe00", "probe00", "probe00", "probe00" ] one = ONE() assert len(eids) == len(probes) metrics = {} for _, metric_name in metric_funcs: metrics[metric_name] = [] for i, (eid, probe) in enumerate(zip(eids, probes)): print(eid) if eid in bad_eids: continue print("{} from {}".format(i, len(eids))) print(one.list(eid, 'subjects')) coords = one.load(eid, dataset_types=['probes.trajectory']) for c in coords[0]: if c['label'] == probe: print("{}, x: {}, y: {}, z: {}".format(c['label'], c['x'], c['y'], c['z'])) continue spikes, _ = load_spike_sorting(eid, one=one) spikes = spikes[0] if spikes[probe]['times'] is None: print('empty times skip') continue fr = calc_fr(spikes[probe]['times'], spikes[probe]['clusters'])
import matplotlib.pyplot as plt import ibllib.io.extractors from oneibl.one import ONE import alf.io import ibllib.plots as iblplots one = ONE() eid = one.search(subject='KS005', date_range='2019-08-30', number=1)[0] # eid = one.search(subject='CSHL_020', date_range='2019-12-04', number=5)[0] one.alyx.rest('sessions', 'read', id=eid)['task_protocol'] one.list(eid) dtypes = [ '_spikeglx_sync.channels', '_spikeglx_sync.polarities', '_spikeglx_sync.times', '_iblrig_taskSettings.raw', '_iblrig_taskData.raw', '_iblrig_encoderEvents.raw', '_iblrig_encoderPositions.raw', '_iblrig_encoderTrialInfo.raw', ] files = one.load(eid, dataset_types=dtypes, download_only=True) sess_path = alf.io.get_session_path(files[0]) # TODO here we will have to deal with 3A versions by looking up the master probe chmap = ibllib.io.extractors.ephys_fpga.CHMAPS['3B']['nidq'] # chmap = ibllib.io.extractors.ephys_fpga.CHMAPS['3A']['ap'] """get the sync pulses"""
need_load = bool(0) probe = 'probe_00' # must set probe name if need_load: # This is only useful if you do not have the data accessible on the machine # -- GET EID FOR SPECIFIC SESSION # Check that each dataset type needed is present dataset_types = ['ephysData.raw.ap', 'spikes.times', 'clusters.depths'] eid, sinfo = one.search(datasets=dataset_types, subjects='ZM_2407', date_range='2019-11-05', details=True) assert (len(eid) > 0), 'No EID found with those search terms' dtypes_session = one.list(eid)[0] if not set(dataset_types).issubset(set(dtypes_session)): missing_dtypes = [ dt for dt in dataset_types if dt not in dtypes_session ] raise ValueError('Missing datasets: ' + ','.join(missing_dtypes)) # In case the above did not run, # you can find all sessions containing raw data by looking into sinfos: # eids, sinfos = one.search( # datasets=['ephysData.raw.ap', 'spikes.times'], task_protocol='certification', details=True) # Set important directories from `eid` spikes_path = one.load(eid, dataset_types='spikes.amps', clobber=False,
def stream_save_labeled_frames(eid, video_type): startTime = time.time() ''' For a given eid and camera type, stream sample frames, print DLC labels on them and save ''' # eid = '5522ac4b-0e41-4c53-836a-aaa17e82b9eb' # video_type = 'left' n_frames = 5 # sample 5 random frames save_images_folder = '/home/mic/DLC_QC/example_frames/' one = ONE() info = '_'.join( np.array(str(one.path_from_eid(eid)).split('/'))[[5, 7, 8]]) print(info, video_type) r = one.list(eid, 'dataset_types') dtypes_DLC = [ '_ibl_rightCamera.times.npy', '_ibl_leftCamera.times.npy', '_ibl_bodyCamera.times.npy', '_iblrig_leftCamera.raw.mp4', '_iblrig_rightCamera.raw.mp4', '_iblrig_bodyCamera.raw.mp4', '_ibl_leftCamera.dlc.pqt', '_ibl_rightCamera.dlc.pqt', '_ibl_bodyCamera.dlc.pqt' ] dtype_names = [x['name'] for x in r] assert all([i in dtype_names for i in dtypes_DLC]), 'For this eid, not all data available' D = one.load(eid, dataset_types=['camera.times', 'camera.dlc'], dclass_output=True) alf_path = Path(D.local_path[0]).parent.parent / 'alf' cam0 = alf.io.load_object(alf_path, '%sCamera' % video_type, namespace='ibl') Times = cam0['times'] cam = cam0['dlc'] points = np.unique(['_'.join(x.split('_')[:-1]) for x in cam.keys()]) XYs = {} for point in points: x = np.ma.masked_where(cam[point + '_likelihood'] < 0.9, cam[point + '_x']) x = x.filled(np.nan) y = np.ma.masked_where(cam[point + '_likelihood'] < 0.9, cam[point + '_y']) y = y.filled(np.nan) XYs[point] = np.array([x, y]) if video_type != 'body': d = list(points) d.remove('tube_top') d.remove('tube_bottom') points = np.array(d) # stream frames recs = [x for x in r if f'{video_type}Camera.raw.mp4' in x['name']][0]['file_records'] video_path = [x['data_url'] for x in recs if x['data_url'] is not None][0] vid_meta = get_video_meta(video_path) frame_idx = sample(range(vid_meta['length']), n_frames) print('frame indices:', frame_idx) frames = get_video_frames_preload(video_path, frame_idx, mask=np.s_[:, :, 0]) size = [vid_meta['width'], vid_meta['height']] #return XYs, frames x0 = 0 x1 = size[0] y0 = 0 y1 = size[1] if video_type == 'left': dot_s = 10 # [px] for painting DLC dots else: dot_s = 5 # writing stuff on frames font = cv2.FONT_HERSHEY_SIMPLEX if video_type == 'left': bottomLeftCornerOfText = (20, 1000) fontScale = 4 else: bottomLeftCornerOfText = (10, 500) fontScale = 2 lineType = 2 # assign a color to each DLC point (now: all points red) cmap = matplotlib.cm.get_cmap('Set1') CR = np.arange(len(points)) / len(points) block = np.ones((2 * dot_s, 2 * dot_s, 3)) k = 0 for frame in frames: gray = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB) # print session info fontColor = (255, 255, 255) cv2.putText(gray, info, bottomLeftCornerOfText, font, fontScale / 4, fontColor, lineType) # print time Time = round(Times[frame_idx[k]], 3) a, b = bottomLeftCornerOfText bottomLeftCornerOfText0 = (int(a * 10 + b / 2), b) a, b = bottomLeftCornerOfText bottomLeftCornerOfText0 = (int(a * 10 + b / 2), b) cv2.putText(gray, ' time: ' + str(Time), bottomLeftCornerOfText0, font, fontScale / 2, fontColor, lineType) # print DLC dots ll = 0 for point in points: # Put point color legend fontColor = (np.array([cmap(CR[ll])]) * 255)[0][:3] a, b = bottomLeftCornerOfText if video_type == 'right': bottomLeftCornerOfText2 = (a, a * 2 * (1 + ll)) else: bottomLeftCornerOfText2 = (b, a * 2 * (1 + ll)) fontScale2 = fontScale / 4 cv2.putText(gray, point, bottomLeftCornerOfText2, font, fontScale2, fontColor, lineType) X0 = XYs[point][0][frame_idx[k]] Y0 = XYs[point][1][frame_idx[k]] X = Y0 Y = X0 #print(point,X,Y) if not np.isnan(X) and not np.isnan(Y): try: col = (np.array([cmap(CR[ll])]) * 255)[0][:3] # col = np.array([0, 0, 255]) # all points red X = X.astype(int) Y = Y.astype(int) uu = block * col gray[X - dot_s:X + dot_s, Y - dot_s:Y + dot_s] = uu except Exception as e: print('frame', frame_idx[k]) print(e) ll += 1 gray = gray[y0:y1, x0:x1] # cv2.imshow('frame', gray) cv2.imwrite(f'{save_images_folder}{eid}_frame_{frame_idx[k]}.png', gray) cv2.waitKey(1) k += 1 print(f'{n_frames} frames done in', np.round(time.time() - startTime))
## Init from oneibl.one import ONE one = ONE() # need to instantiate the class to have the API. ## Info about a session eid = '86e27228-8708-48d8-96ed-9aa61ab951db' list_of_datasets = one.list(eid) ## More Info about a session d = one.session_data_info(eid) print(d) ## Load #1 dataset_types = [ 'clusters.templateWaveforms', 'clusters.probes', 'clusters.depths' ] eid = '86e27228-8708-48d8-96ed-9aa61ab951db' wf, pr, d = one.load(eid, dataset_types=dataset_types) ## Load #2 my_data = one.load(eid, dataset_types=dataset_types, dclass_output=True) from ibllib.misc import pprint pprint(my_data.local_path) pprint(my_data.dataset_type) ## Load everything eid, ses_info = one.search(subject='flowers') my_data = one.load(eid[0]) pprint(my_data.dataset_type) ## Load
import matplotlib.pyplot as plt import pandas as pd from oneibl.one import ONE from ibllib.time import isostr2date # import sys # sys.path.extend('/home/owinter/PycharmProjects/WGs/BehaviourAnaysis/python') from load_mouse_data import get_behavior from behavior_plots import plot_psychometric one = ONE() # https://alyx.internationalbrainlab.org/admin/actions/session/e752b02d-b54d-4373-b51e-0b31be5f8ee5/change/ # first get the subject information subject_details = one.alyx.rest('subjects', 'read', 'IBL_14') # plot the weight curve # https://alyx.internationalbrainlab.org/admin-actions/water-history/37c8f897-cbcc-4743-bad6-764ccbbfb190 wei = pd.DataFrame(subject_details['weighings']) wei['date_time'].apply(isostr2date) wei.sort_values('date_time', inplace=True) plt.plot(wei.date_time, wei.weight) # now let's get some session information ses_ids = one.search(subjects='IBL_14', date_range='2018-11-27') print(one.list(ses_ids[0])) df = get_behavior('IBL_14', date_range='2018-11-27') plt.figure() plot_psychometric(df, ax=plt.axes(), color="orange")
def Viewer(eid, video_type, trial_range, save_video=True, eye_zoom=False): ''' eid: session id, e.g. '3663d82b-f197-4e8b-b299-7b803a155b84' video_type: one of 'left', 'right', 'body' trial_range: first and last trial number of range to be shown, e.g. [5,7] save_video: video is displayed and saved in local folder Example usage to view and save labeled video with wheel angle: Viewer('3663d82b-f197-4e8b-b299-7b803a155b84', 'left', [5,7]) 3D example: 'cb2ad999-a6cb-42ff-bf71-1774c57e5308', [5,7] ''' save_vids_here = '/home/mic/' if save_vids_here[-1] != '/': return 'Last character of save_vids_here must be slash' one = ONE() dataset_types = [ 'camera.times', 'wheel.position', 'wheel.timestamps', 'trials.intervals', 'camera.dlc' ] a = one.list(eid, 'dataset-types') assert all([i in a for i in dataset_types ]), 'For this eid, not all data available' D = one.load(eid, dataset_types=dataset_types, dclass_output=True) alf_path = Path(D.local_path[0]).parent.parent / 'alf' # Download a single video video_data = alf_path.parent / 'raw_video_data' download_raw_video(eid, cameras=[video_type]) video_path = list(video_data.rglob('_iblrig_%sCamera.raw.*' % video_type))[0] print(video_path) # that gives cam time stamps and DLC output (change to alf_path eventually) cam = alf.io.load_object(alf_path, '%sCamera' % video_type, namespace='ibl') # just to read in times for newer data (which has DLC results in pqt format # cam = alf.io.load_object(alf_path, '_ibl_%sCamera' % video_type) # set where to read and save video and get video info cap = cv2.VideoCapture(video_path.as_uri()) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) fps = cap.get(cv2.CAP_PROP_FPS) size = (int(cap.get(3)), int(cap.get(4))) assert length < len(cam['times']), '#frames > #stamps' print(eid, ', ', video_type, ', fsp:', fps, ', #frames:', length, ', #stamps:', len(cam['times']), ', #frames - #stamps = ', length - len(cam['times'])) # pick trial range for which to display stuff trials = alf.io.load_object(alf_path, 'trials', namespace='ibl') num_trials = len(trials['intervals']) if trial_range[-1] > num_trials - 1: print('There are only %s trials' % num_trials) frame_start = find_nearest(cam['times'], [trials['intervals'][trial_range[0]][0]]) frame_stop = find_nearest(cam['times'], [trials['intervals'][trial_range[-1]][1]]) ''' wheel related stuff ''' wheel = alf.io.load_object(alf_path, 'wheel', namespace='ibl') import brainbox.behavior.wheel as wh try: pos, t = wh.interpolate_position(wheel['timestamps'], wheel['position'], freq=1000) except BaseException: pos, t = wh.interpolate_position(wheel['times'], wheel['position'], freq=1000) w_start = find_nearest(t, trials['intervals'][trial_range[0]][0]) w_stop = find_nearest(t, trials['intervals'][trial_range[-1]][1]) # confine to interval pos_int = pos[w_start:w_stop] t_int = t[w_start:w_stop] # alignment of cam stamps and interpolated wheel stamps wheel_pos = [] kk = 0 for wt in cam['times'][frame_start:frame_stop]: wheel_pos.append(pos_int[find_nearest(t_int, wt)]) kk += 1 if kk % 3000 == 0: print('iteration', kk) ''' DLC related stuff ''' Times = cam['times'][frame_start:frame_stop] del cam['times'] # some exception for inconsisitent data formats try: dlc_name = '_ibl_%sCamera.dlc.pqt' % video_type dlc_path = alf_path / dlc_name cam = pd.read_parquet(dlc_path, engine="fastparquet") print('it is pqt') except BaseException: raw_vid_path = alf_path.parent / 'raw_video_data' cam = alf.io.load_object(raw_vid_path, '%sCamera' % video_type, namespace='ibl') points = np.unique(['_'.join(x.split('_')[:-1]) for x in cam.keys()]) if len(points) == 1: cam = cam['dlc'] points = np.unique(['_'.join(x.split('_')[:-1]) for x in cam.keys()]) if video_type != 'body': d = list(points) d.remove('tube_top') d.remove('tube_bottom') points = np.array(d) # Set values to nan if likelyhood is too low # for pqt: .to_numpy() XYs = {} for point in points: x = np.ma.masked_where(cam[point + '_likelihood'] < 0.9, cam[point + '_x']) x = x.filled(np.nan) y = np.ma.masked_where(cam[point + '_likelihood'] < 0.9, cam[point + '_y']) y = y.filled(np.nan) XYs[point] = np.array( [x[frame_start:frame_stop], y[frame_start:frame_stop]]) # Just for 3D testing # return XYs # Zoom at eye if eye_zoom: pivot = np.nanmean(XYs['pupil_top_r'], axis=1) x0 = int(pivot[0]) - 33 x1 = int(pivot[0]) + 33 y0 = int(pivot[1]) - 28 y1 = int(pivot[1]) + 38 size = (66, 66) dot_s = 1 # [px] for painting DLC dots else: x0 = 0 x1 = size[0] y0 = 0 y1 = size[1] if video_type == 'left': dot_s = 10 # [px] for painting DLC dots else: dot_s = 5 if save_video: loc = save_vids_here + '%s_trials_%s_%s_%s.mp4' % ( eid, trial_range[0], trial_range[-1], video_type) out = cv2.VideoWriter(loc, cv2.VideoWriter_fourcc(*'mp4v'), fps, size) # put , 0 if grey scale # writing stuff on frames font = cv2.FONT_HERSHEY_SIMPLEX if video_type == 'left': bottomLeftCornerOfText = (20, 1000) fontScale = 4 else: bottomLeftCornerOfText = (10, 500) fontScale = 2 lineType = 2 # assign a color to each DLC point (now: all points red) cmap = matplotlib.cm.get_cmap('Spectral') CR = np.arange(len(points)) / len(points) block = np.ones((2 * dot_s, 2 * dot_s, 3)) # set start frame cap.set(1, frame_start) k = 0 while (cap.isOpened()): ret, frame = cap.read() gray = frame # print wheel angle fontColor = (255, 255, 255) Angle = round(wheel_pos[k], 2) Time = round(Times[k], 3) cv2.putText(gray, 'Wheel angle: ' + str(Angle), bottomLeftCornerOfText, font, fontScale / 2, fontColor, lineType) a, b = bottomLeftCornerOfText bottomLeftCornerOfText0 = (int(a * 10 + b / 2), b) cv2.putText(gray, ' time: ' + str(Time), bottomLeftCornerOfText0, font, fontScale / 2, fontColor, lineType) # print DLC dots ll = 0 for point in points: # Put point color legend fontColor = (np.array([cmap(CR[ll])]) * 255)[0][:3] a, b = bottomLeftCornerOfText if video_type == 'right': bottomLeftCornerOfText2 = (a, a * 2 * (1 + ll)) else: bottomLeftCornerOfText2 = (b, a * 2 * (1 + ll)) fontScale2 = fontScale / 4 cv2.putText(gray, point, bottomLeftCornerOfText2, font, fontScale2, fontColor, lineType) X0 = XYs[point][0][k] Y0 = XYs[point][1][k] # transform for opencv? X = Y0 Y = X0 if not np.isnan(X) and not np.isnan(Y): col = (np.array([cmap(CR[ll])]) * 255)[0][:3] # col = np.array([0, 0, 255]) # all points red X = X.astype(int) Y = Y.astype(int) gray[X - dot_s:X + dot_s, Y - dot_s:Y + dot_s] = block * col ll += 1 gray = gray[y0:y1, x0:x1] if save_video: out.write(gray) cv2.imshow('frame', gray) cv2.waitKey(1) k += 1 if k == (frame_stop - frame_start) - 1: break if save_video: out.release() cap.release() cv2.destroyAllWindows()
def GetXYs(eid, video_type, trial_range): ''' INPUT: eid: session id, e.g. '3663d82b-f197-4e8b-b299-7b803a155b84' video_type: one of 'left', 'right', 'body' trial_range: first and last trial number of range to be accessed, e.g. [5,7] OUTPUT: XYs: dictionary with DLC-tracked points as keys, x,y coordinates as entries, set to nan for low likelihood Times: corresponding timestamps ''' one = ONE() dataset_types = ['camera.times', 'trials.intervals', 'camera.dlc'] a = one.list(eid, 'dataset-types') assert all([i in a for i in dataset_types ]), 'For this eid, not all data available' D = one.load(eid, dataset_types=dataset_types, dclass_output=True) alf_path = Path(D.local_path[0]).parent.parent / 'alf' cam = alf.io.load_object(alf_path, '%sCamera' % video_type, namespace='ibl') # pick trial range trials = alf.io.load_object(alf_path, 'trials', namespace='ibl') num_trials = len(trials['intervals']) if trial_range[-1] > num_trials - 1: print('There are only %s trials' % num_trials) frame_start = find_nearest(cam['times'], [trials['intervals'][trial_range[0]][0]]) frame_stop = find_nearest(cam['times'], [trials['intervals'][trial_range[-1]][1]]) last_time_stamp = trials['intervals'][-1][-1] last_stamp_idx = find_nearest(cam['times'], last_time_stamp) print('Last trial ends at time %s, which is stamp index %s' % (last_time_stamp, last_stamp_idx)) Times = cam['times'][frame_start:frame_stop] n_stamps = len(cam['times']) # cam = cam['dlc'] points = np.unique(['_'.join(x.split('_')[:-1]) for x in cam.keys()]) if video_type != 'body': d = list(points) d.remove('tube_top') d.remove('tube_bottom') points = np.array(d) # Set values to nan if likelyhood is too low # for pqt: .to_numpy() XYs = {} for point in points: print(point, len(cam[point + '_x']), n_stamps) assert len(cam[point + '_x']) <= n_stamps, 'n_stamps > dlc' x = np.ma.masked_where(cam[point + '_likelihood'] < 0.9, cam[point + '_x']) x = x.filled(np.nan) y = np.ma.masked_where(cam[point + '_likelihood'] < 0.9, cam[point + '_y']) y = y.filled(np.nan) XYs[point] = np.array( [x[frame_start:frame_stop], y[frame_start:frame_stop]]) return XYs, Times
## Init from oneibl.one import ONE from ibllib.misc import pprint one = ONE(base_url='https://test.alyx.internationalbrainlab.org', username='******', password='******') ## Find an experiment eid = one.search(users='olivier', date_range=['2018-08-24', '2018-08-24']) pprint(eid) one.search_terms() ## List dataset types for a session eid = 'cf264653-2deb-44cb-aa84-89b82507028a' one.list(eid) ## More Info about a session d = one.list(eid, 'All') ## Get More Info about datasets d = one.list(eid, details=True) print(d) print(d) ## List #1 one.list(None, 'dataset-types') one.list(None, 'users') one.list(None, 'subjects') ## Load #1 dataset_types = [ 'clusters.templateWaveforms', 'clusters.probes', 'clusters.depths'
class Alyx2NWBMetadata: def __init__(self, eid=None, one_obj=None, **one_search_kwargs): """ Query the sessions, subject, lab tables of the Alyx database using the ONE api. Retrieve a mice experiment's metadata as well as the various data types (ONE format) created during the experiment like: Trials, Behavior, Electrophysiology(raw, spike sorted), Stimulus, Probes used. Parameters ---------- eid: str uuid of IBL experiment one_obj: ONE() one object created after user authenticated connection to ALyx servers one_search_kwargs: dict various search terms to retrieve an eid of interest using the ONE api to query Alyx. """ if one_obj is None: self.one_obj = ONE() elif not isinstance(one_obj, OneAbstract): raise Exception('one_obj is not of ONE class') else: self.one_obj = one_obj if eid is None: eid = self.one_obj.search(**one_search_kwargs) if len(eid) > 1: print(f'nos of EIDs found for your search query: {len(eid)}, ' f'generating metadata from the first') if input('continue? y/n') == 'y': pass else: exit() self.eid = eid[0] elif isinstance(eid, list): self.eid = eid[0] else: self.eid = eid self.one_search_kwargs = one_search_kwargs self.schema = nwb_schema self.dataset_description_list = self._get_dataset_details() self.eid_session_info = self._retrieve_eid_endpoint() self.dataset_type_list = self._list_eid_metadata('dataset_type') self.users_list = self._list_eid_metadata('users') self.subjects_list = self._list_eid_metadata('subjects') self.labs_list = self._list_eid_metadata('labs') self.dataset_details, self.dataset_simple = self._dataset_type_parse() self._get_lab_table() self._get_subject_table() def _get_datetime(self, dtstr, format='%Y-%m-%dT%X'): if '.' in dtstr: dtstr = dtstr.split('.')[0] if len(dtstr) > 19: dtstr = dtstr[:19] if len(dtstr) == 10: format = '%Y-%m-%d' elif len(dtstr) == 19: if 'T' in dtstr: format = '%Y-%m-%dT%X' else: format = '%Y-%m-%d %X' try: return datetime.strptime(dtstr, format) except: raise Exception('could not convert to datetime') def _get_dataset_details(self): """ Retrieves all datasets in the alyx database currently. Retrieves a list of dicts with keys like id, name, created_by,description etc. Uses only name and description. Returns ------- list List of dicts: {<dataset-name> : <dataset_description> """ data_url_resp = self.one_obj.alyx.rest('dataset-types', 'list') return {i['name']: i['description'] for i in data_url_resp} def _list_eid_metadata(self, list_type): """ Uses one's list method to get the types of <list_type> data from the given eid. Parameters ---------- list_type: str one of strings from >>> ONE().search_terms() Returns ------- list """ return self.one_obj.list(self.eid, list_type) def _retrieve_eid_endpoint(self): """ To get the current sessions url response. Contains all the session metadata as well as the current datasets etc. Returns ------- list list of server responses. """ return self.one_obj.alyx.rest('sessions/' + self.eid, 'list') def _get_lab_table(self): self.lab_table = self.one_obj.alyx.rest('labs', 'list') def _get_subject_table(self): self.subject_table = self.one_obj.alyx.rest( 'subjects/' + self.eid_session_info['subject'], 'list') def _dataset_type_parse(self): """ Returns ------- list list of dicts: {<dataset object name>: (eg. spikes, clusters etc) [ {name: objects attribute type (eg. times, intervals etc description: attributes description} {name: objects attribute type (eg. times, intervals etc description: attributes description} ] } """ split_list_objects = [i.split('.')[0] for i in self.dataset_type_list] split_list_attributes = [ '.'.join(i.split('.')[1:]) for i in self.dataset_type_list ] dataset_description = [ self.dataset_description_list[i] for i in self.dataset_type_list ] split_list_objects_dict_details = dict() split_list_objects_dict = dict() for obj in set(split_list_objects): split_list_objects_dict_details[obj] = [] split_list_objects_dict[obj] = [] for att_idx, attrs in enumerate(split_list_attributes): append_dict = { 'name': attrs, 'description': dataset_description[att_idx] } # 'extension': dataset_extension[att_idx] } split_list_objects_dict_details[ split_list_objects[att_idx]].extend([append_dict]) split_list_objects_dict[split_list_objects[att_idx]].extend( [attrs]) dataset_type_list = split_list_objects_dict_details dataset_type_list_simple = split_list_objects_dict return dataset_type_list, dataset_type_list_simple @staticmethod def _unpack_dataset_details(dataset_details, object_name, custom_attrs=None, match_str=' '): """ Unpacks the dataset_details into: Parameters ---------- dataset_details: dict self.dataset_details object_name: str eg: spikes, clusters, Ecephys custom_attrs: list attrs to unpack match_str: regex match string: attrs to exclude (like .times/.intervals etc) Returns ------- datafiles: str ex: 'face.motionEnergy' datanames: str ex: 'motionEnergy' datadesc: str ex: <description string for motionEnergy> """ cond = lambda x: re.match(match_str, x) datafiles_all = [ object_name + '.' + ii['name'] for ii in dataset_details[object_name] if not cond(ii['name']) ] datafiles_names_all = [ ii['name'] for ii in dataset_details[object_name] if not cond(ii['name']) ] datafiles_desc_all = [ ii['description'] for ii in dataset_details[object_name] if not cond(ii['name']) ] if custom_attrs: datafiles_inc = [] datafiles_names_inc = [] datafiles_desc_inc = [] for attrs in custom_attrs: datafiles_inc.extend([ i for i in datafiles_all if i in object_name + '.' + attrs ]) datafiles_names_inc.extend([ datafiles_names_all[j] for j, i in enumerate(datafiles_all) if i in object_name + '.' + attrs ]) datafiles_desc_inc.extend([ datafiles_desc_all[j] for j, i in enumerate(datafiles_all) if i in object_name + '.' + attrs ]) else: datafiles_inc = datafiles_all datafiles_names_inc = datafiles_names_all datafiles_desc_inc = datafiles_desc_all return datafiles_inc, datafiles_names_inc, datafiles_desc_inc def _initialize_container_dict(self, name=None, default_value=None): if default_value is None: default_value = dict() if name: return dict({name: default_value.copy()}) else: return None def _get_all_object_names(self): return sorted( list(set([i.split('.')[0] for i in self.dataset_type_list]))) def _get_current_object_names(self, obj_list): loop_list = [] for j, k in enumerate(obj_list): loop_list.extend( [i for i in self._get_all_object_names() if k == i]) return loop_list def _get_timeseries_object(self, dataset_details, object_name, ts_name, custom_attrs=None, drop_attrs=None, **kwargs): """ Parameters ---------- dataset_details: dict self.dataset_details object_name: str name of hte object_name in the IBL datatype ts_name: str the key name for the timeseries list custom_attrs: list Attributes to consider drop_attrs: list Attributes to drop kwargs additional keys/values to add to the default timeseries. For derivatives of TimeSEries Returns ------- dict() { "time_series": [ { "name": "face_motionEnergy", "data": "face.motionEnergy", "timestamps": "face.timestamps", "description": "Features extracted from the video of the frontal aspect of the subject, including the subject\\'s face and forearms." }, { "name": "_ibl_lickPiezo_times", "data": "_ibl_lickPiezo.raw", "timestamps": "_ibl_lickPiezo.timestamps", "description": "Voltage values from a thin-film piezo connected to the lick spout, so that values are proportional to deflection of the spout and licks can be detected as peaks of the signal." } ] } """ matchstr = r'.*time.*|.*interval.*' timeattr_name = [ i['name'] for i in dataset_details[object_name] if re.match(matchstr, i['name']) ] dataset_details[object_name], _ = self._drop_attrs( dataset_details[object_name].copy(), drop_attrs) datafiles, datafiles_names, datafiles_desc = \ self._unpack_dataset_details(dataset_details.copy(), object_name, custom_attrs, match_str=matchstr) if timeattr_name: datafiles_timedata, datafiles_time_name, datafiles_time_desc = \ self._unpack_dataset_details(dataset_details.copy(), object_name, timeattr_name) elif not kwargs: # if no timestamps info, then let this fields be data return {ts_name: []} else: datafiles_timedata, datafiles_time_name, datafiles_time_desc = \ datafiles, datafiles_names, datafiles_desc if not datafiles: if not kwargs: return {ts_name: []} # datafiles_names = datafiles_time_name # datafiles_desc = datafiles_time_desc # datafiles = ['None'] timeseries_dict = {ts_name: [None] * len(datafiles)} for i, j in enumerate(datafiles): original = { 'name': datafiles_names[i], 'description': datafiles_desc[i], 'timestamps': datafiles_timedata[0], 'data': datafiles[i] } original.update(**kwargs) timeseries_dict[ts_name][i] = { k: v for k, v in original.items() if v is not None } return timeseries_dict @staticmethod def _attrnames_align(attrs_dict, custom_names): """ the attributes that receive the custom names are reordered to be first in the list Parameters. This assigns description:'no_description' to those that are not found. This will later be used(nwb_converter) as an identifier for non-existent data for the given eid. ---------- attrs_dict:list list of dict(attr_name:'',attr_description:'') custom_names same as 'default_colnames_dict' in self._get_dynamictable_object Returns ------- dict() """ attrs_list = [i['name'] for i in attrs_dict] list_id_func_exclude = \ lambda val, comp_list, comp_bool: [i for i, j in enumerate(comp_list) if comp_bool & (j == val)] cleanup = lambda x: [i[0] for i in x if i] if custom_names: custom_names_list = [i for i in list(custom_names.values())] custom_names_dict = [] for i in range(len(custom_names_list)): custom_names_dict.extend([{ 'name': custom_names_list[i], 'description': 'no_description' }]) attr_list_include_idx = cleanup([ list_id_func_exclude(i, attrs_list, True) for i in custom_names_list ]) attr_list_exclude_idx = set(range(len(attrs_list))).difference( set(attr_list_include_idx)) custom_names_list_include_idx = [ i for i, j in enumerate(custom_names_list) if list_id_func_exclude(j, attrs_list, True) ] for ii, jj in enumerate(custom_names_list_include_idx): custom_names_dict[custom_names_list_include_idx[ ii]] = attrs_dict[attr_list_include_idx[ii]] custom_names_list[custom_names_list_include_idx[ ii]] = attrs_list[attr_list_include_idx[ii]] extend_dict = [attrs_dict[i] for i in attr_list_exclude_idx] extend_list = [attrs_list[i] for i in attr_list_exclude_idx] custom_names_dict.extend(extend_dict) custom_names_list.extend(extend_list) return custom_names_dict, custom_names_list else: out_dict = attrs_dict out_list = attrs_list return out_dict, out_list @staticmethod def _drop_attrs(dataset_details, drop_attrs, default_colnames_dict=None): """ Used to remove given attributes of the IBL dataset. Parameters ---------- dataset_details: list self.dataset_details['clusters'] [ { 'name': 'amps', 'description': description }, { 'name': 'channels', 'description': description } ] drop_attrs: list list of str: attribute names to drop of the self.dataset_details dict default_colnames_dict Returns ------- dataset_details: list list without dictionaries with 'name' as in drop_attrs """ attrs_list = [i['name'] for i in dataset_details] if default_colnames_dict is not None: default_colnames_dict_copy = default_colnames_dict.copy() for i, j in default_colnames_dict.items(): if j not in attrs_list: default_colnames_dict_copy.pop(i) else: default_colnames_dict_copy = default_colnames_dict if drop_attrs is None: return dataset_details, default_colnames_dict_copy elif default_colnames_dict is not None: for i, j in default_colnames_dict.items(): if j in drop_attrs and j in attrs_list: default_colnames_dict_copy.pop(i) dataset_details_return = [ dataset_details[i] for i, j in enumerate(attrs_list) if j not in drop_attrs ] return dataset_details_return, default_colnames_dict_copy @staticmethod def _get_dynamictable_array(**kwargs): """ Helper to dynamictable object method Parameters ---------- kwargs keys and values that define the dictionary, both keys and values are lists where each index would slice all the keys/values and create a dict out of that Returns ------- list list of dictionaries each with the keys and values from kwargs """ custom_keys = list(kwargs.keys()) custom_data = list(kwargs.values()) out_list = [None] * len(custom_data[0]) for ii, jj in enumerate(custom_data[0]): out_list[ii] = dict().copy() for i, j in enumerate(custom_keys): out_list[ii][j] = custom_data[i][ii] return out_list def _get_dynamictable_object(self, dataset_details, object_name, dt_name, default_colnames_dict=None, custom_attrs=None, drop_attrs=None): """ Parameters ---------- dataset_details self.dataset_details for each eid object_name:str object from the IBL data types from which to create this table. dt_name:str custom name for the dynamic table. Its the key with the value being dynamictable_array default_colnames_dict:dict() keys are the custom names of the columns, corresponding values are the attributes which have to be renamed. custom_attrs:list list of attributes for the given IBL object in object_name to be considered, all others are ignored Returns ------- outdict:dict() example output below: {'Trials': [ { "name": "column1 name", "data": "column data uri (string)", "description": "col1 description" }, { "name": "column2 name", "data": "column data uri (string)", "description": "col2 description" } ] } """ dataset_details[object_name], default_colnames_dict = self._drop_attrs( dataset_details[object_name].copy(), drop_attrs, default_colnames_dict) dataset_details[object_name], _ = self._attrnames_align( dataset_details[object_name].copy(), default_colnames_dict) if not default_colnames_dict: default_colnames = [] else: default_colnames = list(default_colnames_dict.keys()) custom_columns_datafilename, custom_columns_name, custom_columns_description = \ self._unpack_dataset_details(dataset_details.copy(), object_name, custom_attrs) custom_columns_name[:len(default_colnames)] = default_colnames in_list = self._get_dynamictable_array( name=custom_columns_name, data=custom_columns_datafilename, description=custom_columns_description) outdict = {dt_name: in_list} return outdict @property def eid_metadata(self): return dict(eid=self.eid) @property def probe_metadata(self): probes_metadata_dict = self._initialize_container_dict( 'Probes', default_value=[]) probe_list = self.eid_session_info['probe_insertion'] probe_dict_keys = ['id', 'model', 'name', 'trajectory_estimate'] input_dict = dict() for key in probe_dict_keys: if key == 'trajectory_estimate': input_dict.update({ key: [[json.dumps(l) for l in probe_list[i].get(key, ["None"])] for i in range(len(probe_list))] }) else: input_dict.update({ key: [ probe_list[i].get(key, "None") for i in range(len(probe_list)) ] }) probes_metadata_dict['Probes'].extend( self._get_dynamictable_array(**input_dict)) return probes_metadata_dict @property def nwbfile_metadata(self): nwbfile_metadata_dict = self._initialize_container_dict('NWBFile') nwbfile_metadata_dict['NWBFile'].update( session_start_time=self._get_datetime( self.eid_session_info['start_time']), keywords=[ ','.join(self.eid_session_info['users']), self.eid_session_info['lab'], 'IBL' ], experiment_description=self.eid_session_info['project'], session_id=self.eid, experimenter=self.eid_session_info['users'], identifier=self.eid, institution=[ i['institution'] for i in self.lab_table if i['name'] == [self.eid_session_info['lab']][0] ][0], lab=self.eid_session_info['lab'], protocol=self.eid_session_info['task_protocol'], surgery='none', notes=', '.join([ f"User:{i['user']}{i['text']}" for i in self.eid_session_info['notes'] ]), session_description=','.join(self.eid_session_info['procedures'])) return nwbfile_metadata_dict @property def sessions_metadata(self): sessions_metadata_dict = self._initialize_container_dict( 'IBLSessionsData') custom_fields = [ 'location', 'project', 'type', 'number', 'end_time', 'parent_session', 'url', 'qc' ] sessions_metadata_dict['IBLSessionsData'] = { i: str(self.eid_session_info[i]) if i not in ['procedures', 'number'] else self.eid_session_info[i] for i in custom_fields } sessions_metadata_dict['IBLSessionsData']['extended_qc'] = json.dumps( self.eid_session_info['extended_qc']) sessions_metadata_dict['IBLSessionsData']['json'] = json.dumps( self.eid_session_info['json']) sessions_metadata_dict['IBLSessionsData']['wateradmin_session_related'] = \ [json.dumps(i) for i in self.eid_session_info['wateradmin_session_related']]\ if len(self.eid_session_info['wateradmin_session_related']) > 0 else ['None'] sessions_metadata_dict['IBLSessionsData']['notes'] = \ [json.dumps(i) for i in self.eid_session_info['notes']]\ if len(self.eid_session_info['notes']) > 0 else ['None'] return sessions_metadata_dict @property def subject_metadata(self): subject_metadata_dict = self._initialize_container_dict('IBLSubject') sub_table_dict = deepcopy(self.subject_table) if sub_table_dict: subject_metadata_dict['IBLSubject'] = dict( age=f'P{sub_table_dict.pop("age_weeks")}W', subject_id=sub_table_dict.pop('id'), description=sub_table_dict.pop('description'), genotype=','.join(sub_table_dict.pop('genotype')), sex=sub_table_dict.pop('sex'), species=sub_table_dict.pop('species'), weight=str(sub_table_dict.pop('reference_weight')), date_of_birth=self._get_datetime( sub_table_dict.pop('birth_date')), **sub_table_dict) water_admin_data = [json.dumps(i) for i in subject_metadata_dict['IBLSubject']['water_administrations']]\ if len(subject_metadata_dict['IBLSubject']['water_administrations']) > 0 else ['None'] subject_metadata_dict['IBLSubject'].update( weighings=[ json.dumps(i) for i in subject_metadata_dict['IBLSubject']['weighings'] ], water_administrations=water_admin_data) temp_metadatadict = deepcopy(subject_metadata_dict['IBLSubject']) for key, val in temp_metadatadict.items(): if isinstance(val, list) and len(val) == 0: _ = subject_metadata_dict['IBLSubject'].pop(key) return subject_metadata_dict @property def surgery_metadata(self): # currently not exposed by api return dict() @property def behavior_metadata(self): behavior_metadata_dict = self._initialize_container_dict('Behavior') behavior_objects = [ 'wheel', 'wheelMoves', 'licks', 'lickPiezo', 'face', 'eye', 'camera' ] current_behavior_objects = self._get_current_object_names( behavior_objects) for object_name in current_behavior_objects: if 'wheel' == object_name: behavior_metadata_dict['Behavior']['BehavioralTimeSeries'] = \ self._get_timeseries_object(self.dataset_details.copy(), object_name, 'time_series') if 'wheelMoves' in object_name: behavior_metadata_dict['Behavior']['BehavioralEpochs'] = \ self._get_timeseries_object(self.dataset_details.copy(), object_name, 'time_intervals') if 'lickPiezo' in object_name: behavior_metadata_dict['Behavior']['BehavioralTimeSeries'][ 'time_series'].extend( self._get_timeseries_object( self.dataset_details.copy(), object_name, 'time_series')['time_series']) if 'licks' in object_name: behavior_metadata_dict['Behavior']['BehavioralEvents'] = \ self._get_timeseries_object(self.dataset_details.copy(), object_name, 'time_series') if 'face' in object_name: behavior_metadata_dict['Behavior']['BehavioralTimeSeries'][ 'time_series'].extend( self._get_timeseries_object( self.dataset_details.copy(), object_name, 'time_series')['time_series']) if 'eye' in object_name: behavior_metadata_dict['Behavior']['PupilTracking'] = \ self._get_timeseries_object(self.dataset_details.copy(), object_name, 'time_series') if 'camera' in object_name: behavior_metadata_dict['Behavior']['Position'] = \ self._get_timeseries_object(self.dataset_details.copy(), object_name, 'spatial_series', name='camera_dlc') if len(behavior_metadata_dict['Behavior']['Position']['spatial_series']) > 0 and \ behavior_metadata_dict['Behavior']['Position']['spatial_series'][0]['data']== \ behavior_metadata_dict['Behavior']['Position']['spatial_series'][0]['timestamps']: behavior_metadata_dict['Behavior']['Position'][ 'spatial_series'][0][ 'timestamps'] = '_iblrig_Camera.timestamps' return behavior_metadata_dict @property def trials_metadata(self): trials_metadata_dict = self._initialize_container_dict('Trials') trials_objects = ['trials'] current_trial_objects = self._get_current_object_names(trials_objects) for object_name in current_trial_objects: if 'trial' in object_name: trials_metadata_dict = self._get_dynamictable_object( self.dataset_details.copy(), 'trials', 'Trials', default_colnames_dict=dict(start_time='intervals', stop_time='intervals')) return trials_metadata_dict @property def stimulus_metadata(self): stimulus_objects = [ 'sparseNoise', 'passiveBeeps', 'passiveValveClick', 'passiveVisual', 'passiveWhiteNoise' ] stimulus_metadata_dict = self._initialize_container_dict('Stimulus') current_stimulus_objects = self._get_current_object_names( stimulus_objects) for object_name in current_stimulus_objects: if 'sparseNoise' in object_name: stimulus_metadata_dict['Stimulus'] = \ self._get_timeseries_object(self.dataset_details.copy(), object_name, 'time_series') if 'passiveBeeps' in object_name: stimulus_metadata_dict['Stimulus']['time_series'].extend( self._get_timeseries_object(self.dataset_details.copy(), object_name, 'time_series')['time_series']) if 'passiveValveClick' in object_name: stimulus_metadata_dict['Stimulus']['time_series'].extend( self._get_timeseries_object(self.dataset_details.copy(), object_name, 'time_series')['time_series']) if 'passiveVisual' in object_name: stimulus_metadata_dict['Stimulus']['time_series'].extend( self._get_timeseries_object(self.dataset_details.copy(), object_name, 'time_series')['time_series']) if 'passiveWhiteNoise' in object_name: stimulus_metadata_dict['Stimulus']['time_series'].extend( self._get_timeseries_object(self.dataset_details.copy(), object_name, 'time_series')['time_series']) return stimulus_metadata_dict @property def device_metadata(self): device_metadata_dict = self._initialize_container_dict( 'Device', default_value=[]) device_metadata_dict['Device'].extend( self._get_dynamictable_array(name=['NeuroPixels probe'], description=['NeuroPixels probe'])) return device_metadata_dict @property def units_metadata(self): units_objects = ['clusters', 'spikes'] metrics_columns = [ 'cluster_id', 'cluster_id.1', 'num_spikes', 'firing_rate', 'presence_ratio', 'presence_ratio_std', 'isi_viol', 'amplitude_cutoff', 'amplitude_std', 'epoch_name', 'ks2_contamination_pct', 'ks2_label' ] units_metadata_dict = self._initialize_container_dict( 'Units', default_value=list()) current_units_objects = self._get_current_object_names(units_objects) for object_name in current_units_objects: if 'clusters' in object_name: units_metadata_dict = \ self._get_dynamictable_object(self.dataset_details.copy(), 'clusters', 'Units', default_colnames_dict=dict(location='brainAcronyms', waveform_mean='waveforms', electrodes='channels', electrode_group='probes', ), drop_attrs=['uuids', 'metrics']) units_metadata_dict['Units'].extend( self._get_dynamictable_array( name=['obs_intervals', 'spike_times'], data=[ 'trials.intervals', 'spikes.clusters,spikes.times' ], description=[ 'time intervals of each cluster', 'spike times of cluster' ])) units_metadata_dict['Units'].extend( self._get_dynamictable_array( name=metrics_columns, data=['clusters.metrics'] * len(metrics_columns), description=['metrics_table columns data'] * len(metrics_columns))) return units_metadata_dict @property def electrodegroup_metadata(self): electrodes_group_metadata_dict = self._initialize_container_dict( 'ElectrodeGroup', default_value=[]) for ii in range(len(self.probe_metadata['Probes'])): try: location_str = self.probe_metadata['Probes'][ii][ 'trajectory_estimate'][0]['coordinate_system'] except: location_str = 'None' electrodes_group_metadata_dict['ElectrodeGroup'].extend( self._get_dynamictable_array( name=[self.probe_metadata['Probes'][ii]['name']], description=[ 'model {}'.format( self.probe_metadata['Probes'][ii]['model']) ], device=[self.device_metadata['Device'][0]['name']], location=[ 'Mouse CoordinateSystem:{}'.format(location_str) ])) return electrodes_group_metadata_dict @property def electrodetable_metadata(self): electrodes_objects = ['channels'] electrodes_table_metadata_dict = self._initialize_container_dict( 'ElectrodeTable') current_electrodes_objects = self._get_current_object_names( electrodes_objects) for i in current_electrodes_objects: electrodes_table_metadata_dict = self._get_dynamictable_object( self.dataset_details.copy(), 'channels', 'ElectrodeTable', default_colnames_dict=dict(group='probes', x='localCoordinates', y='localCoordinates')) return electrodes_table_metadata_dict @property def ecephys_metadata(self): ecephys_objects = [ 'templates', '_iblqc_ephysTimeRms', '_iblqc_ephysSpectralDensity' ] container_object_names = [ 'SpikeEventSeries', 'ElectricalSeries', 'Spectrum' ] custom_attrs_objects = [['waveforms'], ['rms'], ['power']] ecephys_container = self._initialize_container_dict('Ecephys') kwargs = dict() for i, j, k in zip(ecephys_objects, container_object_names, custom_attrs_objects): current_ecephys_objects = self._get_current_object_names([i]) if current_ecephys_objects: if j == 'Spectrum': kwargs = dict( name=i, power='_iblqc_ephysSpectralDensity.power', frequencies='_iblqc_ephysSpectralDensity.freqs', timestamps=None) ecephys_container['Ecephys'].update( self._get_timeseries_object(self.dataset_details.copy(), i, j, custom_attrs=k, **kwargs)) else: warnings.warn(f'could not find {i} data in eid {self.eid}') return ecephys_container @property def acquisition_metadata(self): acquisition_objects = [ 'ephysData', '_iblrig_Camera', '_iblmic_audioSpectrogram' ] container_name_objects = [ 'ElectricalSeries', 'ImageSeries', 'DecompositionSeries' ] custom_attrs_objects = [['raw.nidq', 'raw.ap', 'raw.lf'], ['raw'], ['power']] acquisition_container = self._initialize_container_dict('Acquisition') current_acquisition_objects = self._get_current_object_names( acquisition_objects) idx = [ no for no, i in enumerate(acquisition_objects) if i in current_acquisition_objects ] current_container_name_objects = [ container_name_objects[i] for i in idx ] current_custom_attrs_objects = [custom_attrs_objects[i] for i in idx] kwargs = dict() for i, j, k in zip(current_acquisition_objects, current_container_name_objects, current_custom_attrs_objects): if j == 'DecompositionSeries': kwargs = dict(name=i, metric='power', bands='_iblmic_audioSpectrogram.frequencies') acquisition_container['Acquisition'].update( self._get_timeseries_object(self.dataset_details.copy(), i, j, custom_attrs=k, **kwargs)) return acquisition_container @property def ophys_metadata(self): raise NotImplementedError @property def icephys_metadata(self): raise NotImplementedError @property def scratch_metadata(self): # this can be used to add further details about subject, lab, raise NotImplementedError @property def complete_metadata(self): metafile_dict = { **self.eid_metadata, **self.probe_metadata, **self.nwbfile_metadata, **self.sessions_metadata, **self.subject_metadata, **self.behavior_metadata, **self.trials_metadata, **self.stimulus_metadata, **self.units_metadata, **self.electrodetable_metadata, 'Ecephys': { **self.ecephys_metadata, **self.device_metadata, **self.electrodegroup_metadata, }, 'Ophys': dict(), 'Icephys': dict(), **self.acquisition_metadata } return metafile_dict def write_metadata(self, fileloc, savetype=None): if savetype is not None: if Path(fileloc).suffix != savetype: raise ValueError(f'{fileloc} should of of type {savetype}') else: savetype = Path(fileloc).suffix full_metadata = self.complete_metadata if savetype == '.json': full_metadata['NWBFile']['session_start_time'] = datetime.strftime( full_metadata['NWBFile']['session_start_time'], '%Y-%m-%dT%X') full_metadata['IBLSubject']['date_of_birth'] = datetime.strftime( full_metadata['IBLSubject']['date_of_birth'], '%Y-%m-%dT%X') with open(fileloc, 'w') as f: json.dump(full_metadata, f, indent=2) elif savetype in ['.yaml', '.yml']: with open(fileloc, 'w') as f: yaml.dump(full_metadata, f, default_flow_style=False) print(f'data written in {fileloc}')
a, b = bb.singlecell.calculate_peths(spikes, clusters, quality.index[quality], times) print(time.time() - start) depths = depths[quality] """vals, indizes = np.unique(clusters, return_index=True) clusts = [clusters[i] for i in sorted(indizes)] depths = depths[np.argsort(np.flip(clusts))] # interesting results, weirdly enough""" perps = [5, 8] np.random.seed(4) for p in perps: neurons_embedded = TSNE(perplexity=p).fit_transform(a.means) plt.scatter(neurons_embedded[:, 0], neurons_embedded[:, 1], c=depths) title = "good sort Mouse {} Perplexity {}".format( one.list(eid, 'subject'), p) plt.title(title) plt.savefig('../../figures/' + title + '.png') plt.close() quit() def split(d, l): dizzle = {} for data, label, in zip(d, l): if label not in dizzle: dizzle[label] = [data] else: dizzle[label].append(data) return list(dizzle.values())
class Patcher(abc.ABC): def __init__(self, one=None): # one object if one is None: self.one = ONE() else: self.one = one def patch_dataset(self, path, dset_id=None, dry=False): """ Uploads a dataset from an arbitrary location to FlatIron. :param path: :param dset_id: :param dry: :return: """ status = self._patch_dataset(path, dset_id=dset_id, dry=dry) if not dry and status == 0: self.one.alyx.rest('datasets', 'partial_update', id=dset_id, data={'hash': md5(path), 'file_size': path.stat().st_size, 'version': version.ibllib()} ) def _patch_dataset(self, path, dset_id=None, dry=False, ftp=False): """ Private method that skips """ path = Path(path) if dset_id is None: dset_id = path.name.split('.')[-2] if not alf.io.is_uuid_string(dset_id): dset_id = None assert dset_id assert alf.io.is_uuid_string(dset_id) assert path.exists() dset = self.one.alyx.rest('datasets', "read", id=dset_id) fr = next(fr for fr in dset['file_records'] if 'flatiron' in fr['data_repository']) remote_path = Path(fr['data_repository_path']).joinpath(fr['relative_path']) remote_path = alf.io.add_uuid_string(remote_path, dset_id).as_posix() if remote_path.startswith('/'): full_remote_path = PurePosixPath(FLATIRON_MOUNT + remote_path) else: full_remote_path = PurePosixPath(FLATIRON_MOUNT, remote_path) if isinstance(path, WindowsPath) and not ftp: # On Windows replace drive map with Globus uri, e.g. C:/ -> /~/C/ path = '/~/' + path.as_posix().replace(':', '') status = self._scp(path, full_remote_path, dry=dry)[0] return status def register_dataset(self, file_list, **kwargs): """ Registers a set of files belonging to a session only on the server :param file_list: (list of pathlib.Path) :param created_by: (string) name of user in Alyx (defaults to 'root') :param repository: optional: (string) name of the server repository in Alyx :param versions: optional (list of strings): versions tags (defaults to ibllib version) :param dry: (bool) False by default :return: """ return register_dataset(file_list, one=self.one, server_only=True, **kwargs) def create_dataset(self, file_list, repository=None, created_by='root', dry=False, ftp=False): """ Creates a new dataset on FlatIron and uploads it from arbitrary location. Rules for creation/patching are the same that apply for registration via Alyx as this uses the registration endpoint to get the dataset. An existing file (same session and path relative to session) will be patched. :param path: full file path. Must be whithin an ALF session folder (subject/date/number) can also be a list of full file pathes belonging to the same session. :param server_repository: Alyx server repository name :param created_by: alyx username for the dataset (optional, defaults to root) :param ftp: flag for case when using ftppatcher. Don't adjust windows path in _patch_dataset when ftp=True :return: the registrations response, a list of dataset records """ # first register the file if not isinstance(file_list, list): file_list = [Path(file_list)] assert len(set([alf.io.get_session_path(f) for f in file_list])) == 1 assert all([Path(f).exists() for f in file_list]) response = self.register_dataset(file_list, created_by=created_by, repository=repository, dry=dry) if dry: return # from the dataset info, set flatIron flag to exists=True for p, d in zip(file_list, response): self._patch_dataset(p, dset_id=d['id'], dry=dry, ftp=ftp) return response def delete_dataset(self, dset_id, dry=False): """ Deletes a single dataset from the Flatiron and Alyx database. This does not remove the dataset from local servers. :param dset_id: :param dry: :return: """ if isinstance(dset_id, dict): dset = dset_id dset_id = dset['url'][-36:] else: dset = self.one.alyx.rest('datasets', "read", id=dset_id) assert dset for fr in dset['file_records']: if 'flatiron' in fr['data_repository']: flatiron_path = Path(FLATIRON_MOUNT).joinpath(fr['data_repository_path'][1:], fr['relative_path']) flatiron_path = alf.io.add_uuid_string(flatiron_path, dset_id) status = self._rm(flatiron_path, dry=dry)[0] if status == 0 and not dry: self.one.alyx.rest('datasets', 'delete', id=dset_id) def delete_session_datasets(self, eid, dry=True): """ Deletes all datasets attached to the session from database and flatiron but leaves the session on the database. Useful for a full re-extraction """ ses_details = self.one.alyx.rest('sessions', 'read', id=eid) raise NotImplementedError("Code below only removes existing files. Need to search" "for datasets in a better way") # first delete attached datasets from the database dataset_details = self.one.list(eid, details=True) for n in range(len(dataset_details.dataset_id)): print(dataset_details.dataset_id[n]) if dry: continue try: self.one.alyx.rest('datasets', 'delete', id=dataset_details.dataset_id[n]) except Exception as e: print(e) # then delete the session folder from flatiron flatiron_path = Path(FLATIRON_MOUNT).joinpath(ses_details['lab'], 'public', 'projects', ses_details['project'], 'ALF', ses_details['subject'], ses_details['start_time'][:10], str(ses_details['number']).zfill(3)) """flatiron_path = Path(FLATIRON_MOUNT).joinpath(ses_details['lab'], 'Subjects', ses_details['subject'], ses_details['start_time'][:10], str(ses_details['number']).zfill(3))""" cmd = f"ssh -p {FLATIRON_PORT} {FLATIRON_USER}@{FLATIRON_HOST} rm -fR {flatiron_path}" print(cmd) @abc.abstractmethod def _scp(self, *args, **kwargs): pass @abc.abstractmethod def _rm(self, *args, **kwargs): pass
'38d95489-2e82-412a-8c1a-c5377b5f1555', '4153bd83-2168-4bd4-a15c-f7e82f3f73fb', '614e1937-4b24-4ad3-9055-c8253d089919' ] names_n_counts = [] names = [] all_data = [] for i, (eid, probe) in enumerate(zip(eids, probes)): if eid not in good_eids: print('skipped') continue if eid == '614e1937-4b24-4ad3-9055-c8253d089919': probe = 'probe00' print(eid) print(one.list(eid, 'subjects')) print(probe) channels = load_channel_locations(eid, one=one) # spikes, clusters = load_spike_sorting(eids[0], one=one) all_data.append(channels[probe]) names_n_counts.append(channels[probe].acronym) names.append(one.list(eid, 'subjects')) print(len(channels[probe].acronym)) fs = 18 for i, d in enumerate(names_n_counts): df = pd.DataFrame(data=d, columns=["acronym"]) ax = plt.subplot(2, 3, i + 1) plt.title(names[i], fontsize=fs)