def get_from_excel(data_path, extra_sheet=None): ''' This opens a file dialog allowing ot select an excel file containing the tracked data, and returns a :class:`CellCluster` object. Paramteters ----------- data_path: the path to the excelTM file Returns ------- cellcluster : a :class:`CellCluster` instance the container class for the tracking Notes ----- The excel file should follow the structure of `excel_trajs_example.xlsx` in the project's `data` directory ''' ### Read the data trajs = pd.read_excel(data_path, 0) trajs.t_stamp = trajs.t_stamp.astype(np.int) trajs.label = trajs.label.astype(np.int) trajs.set_index(['t_stamp', 'label'], inplace=True) ### The Trajectories class is a subclass of ### pandas DataFrame ### Parsing excel files tends to add NaNs to the data trajs = Trajectories(trajs.dropna().sortlevel()) metadata = pd.read_excel(data_path, 1) metadata = {name: value for name, value in zip(metadata['Name'], metadata['Value'])} metadata['FileName'] = data_path store_path = metadata['FileName'] if '.' in store_path[-6:]: store_path = ''.join(store_path.split('.')[:-1]+['.h5']) else: store_path = store_path+'.h5' store_path = os.path.join( os.path.dirname(data_path), store_path) ### The ObjectsIO class objectsio = ObjectsIO(metadata=metadata, store_path=store_path) cellcluster = CellCluster(objectsio=objectsio) cellcluster.trajs = trajs cellcluster.oio['trajs'] = trajs if extra_sheet is not None: try: extra = pd.read_excel(data_path, extra_sheet) cellcluster.extra = extra cellcluster.oio['extra'] = extra except: print('Extra data from sheet {} not found in the file {}'.format(extra_sheet, data_path)) return cellcluster
def load_multiple_excel(data_path, extra_sheet=None): xlsx_file = pd.io.excel.ExcelFile(data_path) lastsheet = xlsx_file.book.nsheets - 1 global_metadata = pd.read_excel(data_path, lastsheet) global_metadata = {name: value for name, value in zip(global_metadata['Name'], global_metadata['Value'])} clusters = {} global_metadata['FileName'] = global_metadata['FileName'].replace(' ', '') for i, name in enumerate(global_metadata['FileName'].split(',')): ### Read the data trajs = pd.read_excel(data_path, i) trajs.t_stamp = trajs.t_stamp.astype(np.int) trajs.label = trajs.label.astype(np.int) trajs.set_index(['t_stamp', 'label'], inplace=True) trajs = Trajectories(trajs.dropna()) metadata = global_metadata.copy() metadata['FileName'] = os.path.join( os.path.dirname(data_path), name) store_path = metadata['FileName'] if '.' in store_path[-6:]: store_path = ''.join(store_path.split('.')[:-1]+['.h5']) else: store_path = store_path+'.h5' store_path = os.path.join( os.path.dirname(data_path), store_path) ### The ObjectsIO class objectsio = ObjectsIO(metadata=metadata, store_path=store_path) cellcluster = CellCluster(objectsio=objectsio) cellcluster.trajs = trajs cellcluster.oio['trajs'] = trajs clusters[name] = cellcluster return clusters