Esempio n. 1
0
for file in glob.glob(os.path.join(filePath_um, '*.pkl')):
    dat_list.append(pd.read_pickle(file))
dat = pd.concat(dat_list, axis=1)
print(dat.head())

file_suffix = ''
if input('Scale each slice to between 0 and 1? y/n: ') == 'y':
    dat_scaled = norm_data(dat)
    dat = dat_scaled
    file_suffix += '_slices-scaled'
dat = dat.dropna()
print(dat.shape)
print(dat.head())

r_dat_theta = format_rectangles(dat,
                                scale='standard',
                                theta_av=True,
                                rCol=dat.columns[0])

r_dat_rect = format_rectangles(dat, scale='standard', theta_av=False)

# %% calculate wedge data
wedge_path = '/Users/s1101153/OneDrive - University of Edinburgh/Files/OCP_working/droplet_stacks/63x/'

info_file = os.path.join(wedge_path, 'stack_info.csv')
save_file = os.path.join(wedge_path, 'wedges') + '.pkl'

info_file_A = os.path.join(wedge_path, 'stack_info_2020-08-28_A.csv')
save_file_A = os.path.join(wedge_path, 'wedges_A') + '.pkl'

info_file_B = os.path.join(wedge_path, 'stack_info_2020-08-28_B.csv')
save_file_B = os.path.join(wedge_path, 'wedges_B') + '.pkl'
Esempio n. 2
0
from full_analysis_tools import format_rectangles
from slice_tools import read_files
import pandas as pd
from sklearn import cluster
import numpy as np
import matplotlib.pyplot as pl

filePath = '/Users/s1101153/OneDrive - University of Edinburgh/Files/OCP_working/droplet_stacks/63x/rect_pickles'
imagePath = '/Users/s1101153/OneDrive - University of Edinburgh/Files/OCP_working/droplet_stacks/63x/final_images/ims_to_read/'

dat = read_files(filePath)
r_dat = format_rectangles(dat, scale='standard', theta_av=True)


def optimalK(data, nrefs=3, maxClusters=15):
    """
    FROM https://anaconda.org/milesgranger/gap-statistic/notebook
    Calculates KMeans optimal K using Gap Statistic from Tibshirani, Walther, Hastie
    Params:
        data: ndarry of shape (n_samples, n_features)
        nrefs: number of sample reference datasets to create
        maxClusters: Maximum number of clusters to test for
    Returns: (gaps, optimalK)
    """
    gaps = np.zeros((len(range(1, maxClusters)),))
    resultsdf = pd.DataFrame({'clusterCount': [], 'gap': []})
    for gap_index, k in enumerate(range(1, maxClusters)):

        # Holder for reference dispersion results
        refDisps = np.zeros(nrefs)
Esempio n. 3
0
for file in glob.glob(os.path.join(filePath_um, '*.pkl')):
    dat_list.append(pd.read_pickle(file))
dat = pd.concat(dat_list, axis=1)
rCol = dat.columns[0]
file_suffix = ''

if input('Scale each slice to between 0 and 1? y/n: ') == 'y':
    dat_scaled = norm_data(dat)
    dat = dat_scaled
    file_suffix += '_slices-scaled'
dat = dat.dropna()
print(dat.shape)
print(dat.describe())

if input('Use theta-averaged data? y/n: ') == 'y':
    r_dat = format_rectangles(dat, scale='standard', theta_av=True, rCol=rCol)
    file_suffix += '_th-av'
    print(r_dat.head())
else:
    r_dat = format_rectangles(dat, scale='standard', theta_av=False)
    file_suffix += '_rect'
    print(r_dat.head())
# %% calculate wedge data
wedge_path = '/Users/s1101153/OneDrive - University of Edinburgh/Files/OCP_working/droplet_stacks/63x/'

data_tag = input('Which data to include? all/nice/oneA/oneB: ')
if data_tag == 'all':
    info_file = os.path.join(wedge_path, 'stack_info.csv')
    save_file = os.path.join(wedge_path, 'wedges_all') + '.pkl'
    file_suffix += '_all'
elif data_tag == 'nice':