def volume_to_mmol_wrapper(vol_df, rdict, experiment): portion_mmol_df = pd.DataFrame() for columnname in vol_df.columns: reagent = int(get_reagent_number_as_string( columnname)) # 'Reagent2 (ul)' to give '2' mmol_df = calcs.mmolextension((vol_df[columnname]), rdict, experiment, reagent) portion_mmol_df = pd.concat([portion_mmol_df, mmol_df], axis=1) return portion_mmol_df
def default_statedataframe(rxndict, expoverview, vollimits, rdict, experiment): """Generate a state set from the volume constraints of the experimental system ensuring that the limits are met. Return the full df of volumes as well as the idealized conc df :param rxndict: :param expoverview: :param vollimits: :param rdict: :param experiment: :param volspacing: :return: """ portionnum = 0 # TODO these two vars actually dont get used, just overwritten prdf = pd.DataFrame() prmmoldf = pd.DataFrame() fullreagentnamelist = [] fullvollist = [] for portion in expoverview: reagentnamelist = [] reagentvols = [] for reagent in portion: # generate the list of possible volumes for each reagent # and the associated mmol calculated values (for parsing later) # Take the maximum volume limit and generate a list of all possible volumes from 0 to the max reagentnamelist.append('Reagent%s (ul)' % reagent) reagentvols.append(list(range(0, vollimits[portionnum][1]+1, config.volspacing))) fullreagentnamelist.append('Reagent%s (ul)' % reagent) # generate permutation of all of the volumes testdf = pd.DataFrame(list(itertools.product(*reagentvols))) testdf.astype(int) # organize dataframe with the sums of the generated numbers sumdf = testdf.sum(axis=1) sumname = 'portion%s_volsum' % portionnum reagentnamelist.append(sumname) rdf = pd.concat([testdf, sumdf], axis=1, ignore_index=True) rdf.columns = reagentnamelist # Select only those which meet the volume critera specified by the portion of the experiment finalrdf = rdf.loc[(rdf[sumname] >= int(vollimits[portionnum][0])) & (rdf[sumname] <= int(vollimits[portionnum][1]))] finalrdf = finalrdf.drop(labels=sumname, axis=1) fullvollist.append(finalrdf.values.tolist()) portionnum += 1 # permute all combinations of the portions that meet the requirements set by the user fullpermlist = list(itertools.product(*fullvollist)) # combine the list of list for each rxn into a single list for import into pandas finalfulllist = [list(itertools.chain.from_iterable(multivol)) for multivol in fullpermlist] prdf = pd.DataFrame(finalfulllist) prdf = prdf.drop_duplicates() prdf.columns = fullreagentnamelist prdf.astype(float) finalmmoldf = pd.DataFrame() for reagentname in fullreagentnamelist: if "Reagent" in reagentname: reagentnum = reagentname.split('t')[1].split(' ')[0] mmoldf = calcs.mmolextension(prdf[reagentname], rdict, experiment, reagentnum) finalmmoldf = pd.concat([finalmmoldf, mmoldf], axis=1) return prdf, finalmmoldf
def wolfram_sampling(expoverview, rdict, old_reagents, vollimits, rxndict, vardict, wellnum, userlimits, experiment): """Sample from the convex hull defined in species concentration space with uniform probability Any portions defined in the experiment overview beyond the first will be sampled by the default_sampler :param old_reagents: :param vardict: :return: (experiment volume df, experiment mmol df, version number of this sampler) """ experiment_mmol_df = pd.DataFrame() experiment_df = pd.DataFrame() version = 1.2 # original "expert sampling" if len(expoverview) > 1: modlog.warning('only first portion will use mathematica sampler') portionnum = 0 portion = expoverview[portionnum] volmax = vollimits[portionnum][1] maxconc = rxndict.get('max_conc', 15) portion_reagents = [rdict[str(i)] for i in portion] portion_species_names = get_unique_chemical_names(portion_reagents) reagent_vectors = build_reagent_vectors(portion_reagents, portion_species_names) if rxndict.get('multi_stock_sampling'): old_reagent_species_names = get_unique_chemical_names(old_reagents) ## todo: move to validation #if old_reagent_species_names != portion_species_names: # modlog.error(("Old and new reagents must be made out of the same chemicals." + # f"\nNew reagent chemicals: {portion_species_names}" + # f"\nOld reagent chemicals: {old_reagent_species_names}")) # sys.exit(1) old_reagent_vectors = build_reagent_vectors(old_reagents, old_reagent_species_names) else: old_reagent_vectors = None ws = WolframSampler() experiments = ws.randomlySample(reagent_vectors, old_reagent_vectors, int(wellnum), float(maxconc), float(volmax)) ws.terminate() #TODO: the randomly sample returns bogus entires (doesn't error) if the run is not properly constructure # Validation should be done prior to feeding into mathematica try: portion_df = pd.DataFrame.from_dict(experiments) except ValueError: modlog.error( f'The .xlsx specification cannot be used to generate samples!') modlog.error( f'Please ensure the run is correctly specified. See FAQs for suggestions.' ) print( f'User information and FAQs can be found at: https://docs.google.com/document/d/1RQJvAlDVIfu19Tea23dLUSymLabGfwJtDnZwANtU05s/edit#bookmark=id.8sg0qwagd7yw' ) import sys sys.exit() # todo How long can this reagent 6/7 hotfix remain like this? Answer: Forever if rxndict['ExpWorkflowVer'] <= 1.1: if rxndict['lab'] in ['LBL', 'HC'] and ('Reagent7 (ul)' in portion_df.columns): portion_df['Reagent6 (ul)'] = np.floor( portion_df['Reagent7 (ul)'] / 2).astype(int) portion_df['Reagent7 (ul)'] = np.ceil(portion_df['Reagent7 (ul)'] / 2).astype(int) rdict['6'] = rdict['7'] portion_mmol_df = pd.DataFrame() for col in portion_df.columns: reagent = int( col.split('t')[1].split('(')[0]) # 'Reagent2 (ul)' to give '2' mmol_df = calcs.mmolextension((portion_df[col]), rdict, experiment, reagent) portion_mmol_df = pd.concat([portion_mmol_df, mmol_df], axis=1) experiment_mmol_df = pd.concat([experiment_mmol_df, portion_mmol_df], axis=1) experiment_df = pd.concat([experiment_df, portion_df], axis=1) portionnum += 1 if portionnum < len(expoverview): modlog.warn( "Using default sampler for portion 2, mathematica sampling not supported for greater than first portion" ) # version2 variable doesn't get used in this case, toss to a garbage variable and retain the mathematica version prdf, prmmoldf, version2 = default_sampling( expoverview, rdict, vollimits, rxndict, wellnum, userlimits, experiment, portion_start_idx=portionnum) experiment_mmol_df = pd.concat([experiment_mmol_df, prmmoldf], axis=1) experiment_df = pd.concat([experiment_df, prdf], axis=1) return experiment_df, experiment_mmol_df, version
def default_sampling(expoverview, rdict, vollimits, rxndict, wellnum, userlimits, experiment, portion_start_idx=0): """Ian's original sampling implementation. Performs samplings within portions of the expoverview, starting from portion_start_idx :return: (experiment volume df, experiment mmol df, version number of this sampler) """ version = 2.7 # random sampling >3 chemicals (non-zero), maintains random sampling on secondary portions prdf = pd.DataFrame() prmmoldf = pd.DataFrame() portionnum = portion_start_idx while portionnum < len(expoverview): # need the volume minimum and maximum and well count reagentcount = 1 reagenttotal = len(expoverview[portionnum]) # Determine from the chemicals and the remaining volume the maximum and # minimum volume possible for the sobol method volmax = vollimits[portionnum][1] # unoptimized code that ensure that the previous reagents are considered and that the final reagent accurately # fills to the minimum volume set by the users "fill to" requirement rdf = pd.DataFrame() mmoldf = pd.DataFrame() finalrdf = pd.DataFrame() finalmmoldf = pd.DataFrame() for reagent in expoverview[portionnum]: finalvolmin = vollimits[portionnum][0] if reagentcount == 1: if len(expoverview[portionnum]) == 1: volmin = vollimits[portionnum][0] volmax = vollimits[portionnum][1] + 0.00001 else: volmin = 0 # since all of the volume limits for the first draw are the same these can be # treated as a bounded search sequence rvolmax, rvolmin = calcvollimit(userlimits, rdict, volmax, volmin, experiment, expoverview[portionnum], reagent, wellnum) # Returns datafram of volumes of each reagent added to each experiment rdf = initialrdf(rvolmax, rvolmin, reagent, wellnum) # Returns mmol specified dataframe for each experiment, reagent, and # chemical (<-- values are in the header) have been generated for this portion of the experiment mmoldf = calcs.mmolextension((rdf['Reagent%s (ul)' % reagent]), rdict, experiment, reagent) reagentcount += 1 # operate within the available ranges taken from the previous constraints elif reagentcount < reagenttotal: # The constraints on the middle draws are more complicated and are dependent upon the first, # a different sampling strategy must be used # (i.e. this is not going to use sobol as the ranges are different for each) # Constrain the range based on volume, reagent-chemical concentrations and user constraints rvolmaxdf, rvolmindf = calcvollimitdf( finalrdf, mmoldf, userlimits, rdict, volmax, volmin, experiment, expoverview[portionnum], reagent, wellnum, rxndict) # Since each volume maximum is different, need to sample the remaining reagents independently # (thus different sampling) rdf = rdfbuilder(rvolmaxdf, rvolmindf, reagent, wellnum) mmoldf = calcs.mmolextension((rdf['Reagent%s (ul)' % reagent]), rdict, experiment, reagent) reagentcount += 1 # Ensure that the final round meets the lower bounds and upper bound total # fill volume requirements of the user elif reagentcount == reagenttotal: if vollimits[portionnum][0] == vollimits[portionnum][1]: # print(finalrdf.sum(axis=1)) rvolmaxdf, rvolmindf = calcvollimitdf( finalrdf, mmoldf, userlimits, rdict, volmax, volmin, experiment, expoverview[portionnum], reagent, wellnum, rxndict) reagentname = "Reagent%s (ul)" % reagent rdf = pd.DataFrame(rvolmaxdf, columns=[reagentname]) mmoldf = calcs.mmolextension( (rdf['Reagent%s (ul)' % reagent]), rdict, experiment, reagent) else: rvolmaxdf, rvolmindf = calcvollimitdf( finalrdf, mmoldf, userlimits, rdict, volmax, volmin, experiment, expoverview[portionnum], reagent, wellnum, rxndict) rvolmindf = ensuremin(rvolmindf, finalrdf, finalvolmin) rdf = rdfbuilder(rvolmaxdf, rvolmindf, reagent, wellnum) mmoldf = calcs.mmolextension( rdf['Reagent%s (ul)' % reagent], rdict, experiment, reagent) reagentcount += 1 else: modlog.error( "Fatal error. Unable to effectively parse reagent%s in portion %s. \ Please make sure that the selected values make chemical sense!" % (reagent, expoverview[portionnum])) finalrdf = pd.concat([finalrdf, rdf], axis=1) finalmmoldf = pd.concat([finalmmoldf, mmoldf], axis=1) prdf = pd.concat([prdf, finalrdf], axis=1) prmmoldf = pd.concat([prmmoldf, finalmmoldf], axis=1) portionnum += 1 return prdf, prmmoldf, version