예제 #1
0
파일: io.py 프로젝트: glyg/cell-tracker
def get_from_excel(data_path, extra_sheet=None):
    '''
    This opens a file dialog allowing ot select an excel file containing
    the tracked data, and returns a :class:`CellCluster` object.

    Paramteters
    -----------

    data_path: the path to the excelTM file

    Returns
    -------

    cellcluster : a :class:`CellCluster` instance
         the container class for the tracking

    Notes
    -----

    The excel file should follow the structure of `excel_trajs_example.xlsx`
    in the project's `data` directory
    '''

    ### Read the data
    trajs = pd.read_excel(data_path, 0)
    trajs.t_stamp = trajs.t_stamp.astype(np.int)
    trajs.label = trajs.label.astype(np.int)
    trajs.set_index(['t_stamp', 'label'],
                    inplace=True)

    ### The Trajectories class is a subclass of
    ### pandas DataFrame
    ### Parsing excel files tends to add NaNs to the data
    trajs = Trajectories(trajs.dropna().sortlevel())
    metadata = pd.read_excel(data_path, 1)
    metadata = {name: value for name, value
                in zip(metadata['Name'], metadata['Value'])}

    metadata['FileName'] = data_path
    store_path = metadata['FileName']
    if '.' in store_path[-6:]:
        store_path = ''.join(store_path.split('.')[:-1]+['.h5'])
    else:
        store_path = store_path+'.h5'
    store_path = os.path.join(
        os.path.dirname(data_path), store_path)

    ### The ObjectsIO class
    objectsio = ObjectsIO(metadata=metadata, store_path=store_path)
    cellcluster = CellCluster(objectsio=objectsio)
    cellcluster.trajs = trajs
    cellcluster.oio['trajs'] = trajs
    if extra_sheet is not None:
        try:
            extra = pd.read_excel(data_path, extra_sheet)
            cellcluster.extra = extra
            cellcluster.oio['extra'] = extra
        except:
            print('Extra data from sheet {} not found in the file {}'.format(extra_sheet, data_path))
    return cellcluster
예제 #2
0
파일: io.py 프로젝트: glyg/cell-tracker
def load_multiple_excel(data_path, extra_sheet=None):

    xlsx_file = pd.io.excel.ExcelFile(data_path)

    lastsheet = xlsx_file.book.nsheets - 1
    global_metadata = pd.read_excel(data_path, lastsheet)

    global_metadata = {name: value for name, value
                       in zip(global_metadata['Name'],
                              global_metadata['Value'])}

    clusters = {}
    global_metadata['FileName'] = global_metadata['FileName'].replace(' ', '')
    for i, name in enumerate(global_metadata['FileName'].split(',')):

        ### Read the data
        trajs = pd.read_excel(data_path, i)
        trajs.t_stamp = trajs.t_stamp.astype(np.int)
        trajs.label = trajs.label.astype(np.int)
        trajs.set_index(['t_stamp', 'label'],
                        inplace=True)
        trajs = Trajectories(trajs.dropna())

        metadata = global_metadata.copy()
        metadata['FileName'] = os.path.join(
            os.path.dirname(data_path), name)

        store_path = metadata['FileName']
        if '.' in store_path[-6:]:
            store_path = ''.join(store_path.split('.')[:-1]+['.h5'])
        else:
            store_path = store_path+'.h5'
        store_path = os.path.join(
            os.path.dirname(data_path), store_path)

        ### The ObjectsIO class
        objectsio = ObjectsIO(metadata=metadata, store_path=store_path)
        cellcluster = CellCluster(objectsio=objectsio)
        cellcluster.trajs = trajs
        cellcluster.oio['trajs'] = trajs
        clusters[name] = cellcluster

    return clusters