def test_no_wells(): with raises(ConfigError, match="No wells defined"): bio96.load(DIR / "empty.toml") # The following examples actually trigger a different (and more specific) # exception, but it still has "No wells defined" in the message. with raises(ConfigError, match="No wells defined"): bio96.load(DIR / "row_without_col.toml") with raises(ConfigError, match="No wells defined"): bio96.load(DIR / "irow_without_col.toml") with raises(ConfigError, match="No wells defined"): bio96.load(DIR / "col_without_row.toml") with raises(ConfigError, match="No wells defined"): bio96.load(DIR / "icol_without_row.toml")
def main(): import docopt try: args = docopt.docopt(__doc__) toml_path = Path(args['<toml>']) df = bio96.load(toml_path) cmap = colorcet.cm.get(args['--color'], plt.get_cmap(args['--color'])) if not args['--foreground'] and not args['--output']: if os.fork() != 0: sys.exit() fig = plot_layout(df, args['<attr>'], cmap=cmap) if args['--output']: out_path = args['--output'].replace('$', toml_path.stem) fig.savefig(out_path) print("Layout written to:", out_path) else: title = str(toml_path) if args['<attr>']: title += f' [{", ".join(args["<attr>"])}]' fig.canvas.set_window_title(title) plt.show() except CliError as err: print(err) except ConfigError as err: err.toml_path = toml_path print(err)
def test_two_plates(): df = bio96.load( DIR / 'two_plates.toml', data_loader=pd.read_csv, merge_cols={'well': 'Well'}, ) assert row(df, 'plate == "a"') == dict( path=DIR / 'two_plates_a.csv', plate='a', well='A1', Well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, Data=0, ) assert row(df, 'plate == "b"') == dict( path=DIR / 'two_plates_b.csv', plate='b', well='A1', Well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=2, Data=1, )
def load_cq(toml_path): def parse_csv(toml_path): df = pd.read_csv(toml_path / 'Quantification Cq Results.csv') df = df.rename({'Well': 'well0', 'Cq': 'cq'}, axis='columns') return df[['well0', 'cq']] return bio96.load( toml_path, parse_csv, merge_cols={'well0': 'well0'}, path_guess='{0.stem}/', )
def test_one_plate(): labels = bio96.load(DIR / 'one_plate.toml') assert row(labels, 'well == "A1"') == dict( path=DIR / 'one_plate.csv', plate='a', well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, )
def load_melt_data(toml_path): def melt_from_xlsx(xlsx_path): return pd.read_excel(xlsx_path, sheet_name='Melt Curve Raw Data', header=43) df, opt = bio96.load( toml_path, melt_from_xlsx, {'well': 'Well Position'}, ) df = drop_outliers(df) return df
def load_pcr_data(toml_path): def pcr_from_xlsx(xlsx_path): return pd.read_excel(xlsx_path, sheet_name='Amplification Data', header=43) df, opt = bio96.load( toml_path, pcr_from_xlsx, {'well': 'Well Position'}, ) df = drop_outliers(df) return df
def load_ct_data(toml_path): def ct_from_xlsx(xlsx_path): ct = pd.read_excel(xlsx_path, sheet_name='Results', header=43) ct = ct.dropna(thresh=3) ct = ct.dropna(axis='columns', how='all') return ct df, opt = bio96.load( toml_path, ct_from_xlsx, {'well': 'Well Position'}, ) df = drop_outliers(df) return df
def load_data(toml_path): def load_biotek(path): expt = plate_reader.BiotekExperiment(path) return expt.kinetic[600] df, options = bio96.load( toml_path, load_biotek, {'well': 'well'}, path_guess='{0.stem}.xlsx', ) print(f"{df.read.isna().sum()}/{len(df)} data points discarded.") return df.dropna(subset=['read'])
def load(toml_path, query=None, aggregate=None, genes=None): toml_path = Path(toml_path) def biorad(path): df = pd.read_csv(path / 'Quantification Cq Results.csv') df = df.rename({'Well': 'well0', 'Cq': 'cq'}, axis='columns') return df[['well0', 'cq']] def applied_biosystems(path): df = pd.read_excel(path, sheet_name='Results', header=43) df = df.dropna(thresh=3) df = df.rename({'Well Position': 'well', 'CT': 'cq'}) return df[['well', 'cq']] with toml_path.open() as f: first_line = f.read().lower() if "applied biosystems" in first_line: load_unlabeled_cq = applied_biosystems merge_cols = {'well': 'well'}, else: # biorad load_unlabeled_cq = biorad merge_cols = {'well0': 'well0'} df, options = bio96.load( toml_path, load_unlabeled_cq, merge_cols, path_guess='{0.stem}/', ) if query: n0 = len(df) df = df.query(query) print(f"Kept {len(df)}/{n0} observations where {query}") if aggregate: df = calc_cq(df, aggregate) if genes: df = calc_Δcq(df, genes) return df, options
def test_concat(): labels = bio96.load(DIR / 'one_concat.toml') assert len(labels) == 2 with raises(ConfigError, match="Did you mean to set `meta.path`?"): bio96.load(DIR / 'one_concat.toml', path_required=True) labels = bio96.load(DIR / 'two_concats.toml') assert len(labels) == 3 with raises(ConfigError, match="Did you mean to set `meta.path`?"): bio96.load(DIR / 'two_concats.toml', path_required=True) # Should not raise. It's ok that `just_concat.csv` doesn't exist, because # `just_concat.toml` doesn't specify any wells. labels = bio96.load( DIR / 'just_concat.toml', path_guess='{0.stem}.csv', path_required=True, ) assert len(labels) == 1
def test_bad_args(): # Doesn't make sense to specify `merge_cols` without `data_loader`: with raises(ValueError): bio96.load(DIR / 'two_plates.toml', merge_cols={}) # Non-existent merge columns. with raises(ValueError, match='xxx'): bio96.load( DIR / 'two_plates.toml', data_loader=pd.read_csv, merge_cols={'xxx': 'Well'}, ) with raises(ValueError, match='xxx'): bio96.load( DIR / 'two_plates.toml', data_loader=pd.read_csv, merge_cols={'well': 'xxx'}, )
import pandas as pd args = docopt.docopt(__doc__) def df_from_path(path): """ Load experimental data from the given path into a data frame. Also make sure that data frame has the column(s) referenced by the `merge_cols` argument to `bio96.load()`, which in this case is "Well". This function will generally be different for every type of data you work with. Many instruments can export data in the ``*.xlsx`` format, which can be easily loaded into a data frame using ``pd.read_excel()``. For other file formats, you may be able to find a library to parse them, or you may have to parse them yourself. """ return pd.read_excel(path) df = bio96.load(args['<toml>'], df_from_path, {'well': 'Well'}) # The data frame loaded above will have rows for each well, columns for each # field in the TOML file, and more columns for each kind of data found in # the paths referenced by (or inferred from) the TOML file. There are lots # of ways to work with the data, but the ``pd.DataFrame.groupby()`` method # (useful for selecting subsets of the data based on one or more attributes) # is good to know about. print(df)
def test_reasonably_complex(): df = bio96.load(DIR / 'reasonably_complex.toml') assert len(df) == 32
def test_one_well(): labels = bio96.load(DIR / 'one_well_xy.toml') assert row(labels, 'well == "A1"') == dict( well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, y=1, ) labels = bio96.load(DIR / 'one_well_xy.toml', path_guess='{0.stem}.csv') assert row(labels, 'well == "A1"') == dict( path=DIR / 'one_well_xy.csv', well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, y=1, ) with raises(ConfigError, match='one_well_xy.toml'): bio96.load(DIR / 'one_well_xy.toml', path_required=True) with raises(ConfigError, match='one_well_xy.toml'): bio96.load(DIR / 'one_well_xy.toml', data_loader=pd.read_csv) labels, data = bio96.load( DIR / 'one_well_xy.toml', data_loader=pd.read_csv, path_guess='{0.stem}.csv', ) assert row(labels, 'well == "A1"') == dict( path=DIR / 'one_well_xy.csv', well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, y=1, ) assert row(data, 'Well == "A1"') == dict( Well='A1', path=DIR / 'one_well_xy.csv', Data='xy', ) df = bio96.load( DIR / 'one_well_xy.toml', data_loader=pd.read_csv, merge_cols={'well': 'Well'}, path_guess='{0.stem}.csv', ) assert row(df, 'well == "A1"') == dict( path=DIR / 'one_well_xy.csv', well='A1', Well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, y=1, Data='xy', ) df = bio96.load( DIR / 'one_well_xy.toml', data_loader=read_csv_and_rename, merge_cols=True, path_guess='{0.stem}.csv', ) assert row(df, 'well == "A1"') == dict( path=DIR / 'one_well_xy.csv', well='A1', Well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, y=1, Data='xy', )
def test_one_well_with_extras(extras_arg, expected): labels, extras = bio96.load( DIR / 'one_well_xy_extras.toml', extras=extras_arg, ) assert extras == expected assert row(labels, 'well == "A1"') == dict( well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, y=1, ) labels, data, extras = bio96.load( DIR / 'one_well_xy_extras.toml', data_loader=pd.read_csv, path_guess='{0.stem}.csv', extras=extras_arg, ) assert extras == expected assert row(labels, 'well == "A1"') == dict( path=DIR / 'one_well_xy_extras.csv', well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, y=1, ) assert row(data, 'Well == "A1"') == dict( Well='A1', path=DIR / 'one_well_xy_extras.csv', Data='xy', ) # Merged data a1_expected = dict( path=DIR / 'one_well_xy_extras.csv', well='A1', Well='A1', well0='A01', row='A', col='1', row_i=0, col_j=0, x=1, y=1, Data='xy', ) df, extras = bio96.load( DIR / 'one_well_xy_extras.toml', data_loader=pd.read_csv, merge_cols={'well': 'Well'}, path_guess='{0.stem}.csv', extras=extras_arg, ) assert extras == expected assert row(df, 'well == "A1"') == a1_expected df, extras = bio96.load( DIR / 'one_well_xy_extras.toml', data_loader=read_csv_and_rename, merge_cols=True, path_guess='{0.stem}.csv', extras=extras_arg, ) assert extras == expected assert row(df, 'well == "A1"') == a1_expected
def get_data(): if os.path.isdir(args['<bio96_metadata>'][0]): dataframes = [] for f in os.listdir(args['<bio96_metadata>']): if f.endswith('.data'): subdata = bio96.load(os.path.join(args['<bio96_metadata>'], f), load_dataframe, {'well': 'variable'}) #print(subdata) dataframes.append(subdata) data = pd.concat(dataframes, ignore_index=True) else: dataframes = [] for f in args['<bio96_metadata>']: subdata = bio96.load(f, load_dataframe, {'well': 'variable'}) dataframes.append(subdata) data = pd.concat(dataframes, ignore_index=True) data = data.dropna() raw = args['--raw'] # Subtract the no-substrate well if args['--zero']: import time start_time = time.time() zero_df = data[data['conc_uM'] == 0] zgroups = zero_df.groupby(['enzyme', 'date', 'replicate']) groupdict = {} for name, group in zgroups: if name[0] not in groupdict: groupdict[name[0]] = {} if name[1] not in groupdict[name[0]]: groupdict[name[0]][name[1]] = {} groupdict[name[0]][name[1]][name[2]] = group for index, row in data.iterrows(): enzyme = row['enzyme'] date = row['date'] replicate = row['replicate'] #print(replicate) conc = row['conc_uM'] timept = row['Time'] zdf = groupdict[enzyme][date][replicate] zero = zdf.loc[zdf['Time'] == timept]['value'].tolist()[0] """ zero = zero_df.loc[(zero_df['enzyme']==enzyme) & \ (zero_df['date']==date) & \ (zero_df['replicate']==replicate) & \ (zero_df['conc_uM']==conc) & \ (zero_df['Time']==time), ['conc_uM']] """ row['value'] = row['value'] - zero data.iloc[index] = row #print('time to load:', time.time() - start_time) if raw: data['product'] = data['value'] else: data['product'] = data['units'] * data['value'] / data[ 'conversion_factor'] if args['--correction']: correction = float(args['--correction']) data['product'] = data['product'] * correction if 'enzyme_conc' in data and not raw: data['persecond'] = data['product'] / data['enzyme_conc'] else: data['persecond'] = data['product'] if 'enzyme_conc' not in data: data['enzyme_conc'] = 1 data['clicked_linear'] = False data['clicked_kinetics'] = False data['slope'] = 1 data['min_time'] = min(data['Time']) data['max_time'] = max(data['Time']) data['shown'] = True return data
def parse_labels(): df = bio96.load('20181002_sgrna_qpcr.toml') df = df.set_index('well') return df
import bio96 import pandas as pd #load the file containing the location of primer combinations in differend plates: plate_primer_well = pd.read_csv(snakemake.params[0]) #load the annotation of your experiment: annotation_df = bio96.load( snakemake.input[0])[['primer_plate', 'well', 'plate', 'cell_type']] #combine plate with well information to find the correct primer_combination annotation_df[ 'plate_well'] = annotation_df['primer_plate'] + "_" + annotation_df['well'] #merge dfs to extract primer combi info from plate_primer_well ann_primers = pd.merge(annotation_df, plate_primer_well, on='plate_well') ann_primers['sample_name'] = ann_primers['plate'] + ann_primers['primer_combi'] ann_primers['well'] = ann_primers['well'] ann_file = ann_primers[['sample_name', 'cell_type', 'well']] ann_file.to_csv(snakemake.output[0], index=False)