예제 #1
0
def volume_to_mmol_wrapper(vol_df, rdict, experiment):
    portion_mmol_df = pd.DataFrame()
    for columnname in vol_df.columns:
        reagent = int(get_reagent_number_as_string(
            columnname))  # 'Reagent2 (ul)' to give '2'
        mmol_df = calcs.mmolextension((vol_df[columnname]), rdict, experiment,
                                      reagent)
        portion_mmol_df = pd.concat([portion_mmol_df, mmol_df], axis=1)

    return portion_mmol_df
예제 #2
0
def default_statedataframe(rxndict, expoverview, vollimits, rdict, experiment):
    """Generate a state set from the volume constraints of the experimental system ensuring that the limits are met.

    Return the full df of volumes as well as the idealized conc df

    :param rxndict:
    :param expoverview:
    :param vollimits:
    :param rdict:
    :param experiment:
    :param volspacing:
    :return:
    """
    portionnum = 0

    # TODO these two vars actually dont get used, just overwritten
    prdf = pd.DataFrame()
    prmmoldf = pd.DataFrame()

    fullreagentnamelist = []
    fullvollist = []

    for portion in expoverview:
        reagentnamelist = []
        reagentvols = []

        for reagent in portion:
            # generate the list of possible volumes for each reagent
            # and the associated mmol calculated values (for parsing later)

            # Take the maximum volume limit and generate a list of all possible volumes from 0 to the max
            reagentnamelist.append('Reagent%s (ul)' % reagent)
            reagentvols.append(list(range(0, vollimits[portionnum][1]+1, config.volspacing)))
            fullreagentnamelist.append('Reagent%s (ul)' % reagent)

        # generate permutation of all of the volumes
        testdf = pd.DataFrame(list(itertools.product(*reagentvols)))
        testdf.astype(int)

        # organize dataframe with the sums of the generated numbers
        sumdf = testdf.sum(axis=1)
        sumname = 'portion%s_volsum' % portionnum
        reagentnamelist.append(sumname)
        rdf = pd.concat([testdf, sumdf], axis=1, ignore_index=True)
        rdf.columns = reagentnamelist

        # Select only those which meet the volume critera specified by the portion of the experiment
        finalrdf = rdf.loc[(rdf[sumname] >= int(vollimits[portionnum][0])) & (rdf[sumname] <= int(vollimits[portionnum][1]))]
        finalrdf = finalrdf.drop(labels=sumname, axis=1)
        fullvollist.append(finalrdf.values.tolist())
        portionnum += 1

    # permute all combinations of the portions that meet the requirements set by the user
    fullpermlist = list(itertools.product(*fullvollist))
    # combine the list of list for each rxn into a single list for import into pandas
    finalfulllist = [list(itertools.chain.from_iterable(multivol)) for multivol in fullpermlist]

    prdf = pd.DataFrame(finalfulllist)
    prdf = prdf.drop_duplicates()
    prdf.columns = fullreagentnamelist
    prdf.astype(float)

    finalmmoldf = pd.DataFrame()
    for reagentname in fullreagentnamelist:
        if "Reagent" in reagentname:
            reagentnum = reagentname.split('t')[1].split(' ')[0]
            mmoldf = calcs.mmolextension(prdf[reagentname], rdict, experiment, reagentnum)
            finalmmoldf = pd.concat([finalmmoldf, mmoldf], axis=1)

    return prdf, finalmmoldf
예제 #3
0
def wolfram_sampling(expoverview, rdict, old_reagents, vollimits, rxndict,
                     vardict, wellnum, userlimits, experiment):
    """Sample from the convex hull defined in species concentration space with uniform probability

    Any portions defined in the experiment overview beyond the first will be sampled by the default_sampler

    :param old_reagents:
    :param vardict:

    :return: (experiment volume df, experiment mmol df, version number of this sampler)
    """
    experiment_mmol_df = pd.DataFrame()
    experiment_df = pd.DataFrame()

    version = 1.2  # original "expert sampling"

    if len(expoverview) > 1:
        modlog.warning('only first portion will use mathematica sampler')
    portionnum = 0
    portion = expoverview[portionnum]

    volmax = vollimits[portionnum][1]
    maxconc = rxndict.get('max_conc', 15)

    portion_reagents = [rdict[str(i)] for i in portion]
    portion_species_names = get_unique_chemical_names(portion_reagents)
    reagent_vectors = build_reagent_vectors(portion_reagents,
                                            portion_species_names)

    if rxndict.get('multi_stock_sampling'):
        old_reagent_species_names = get_unique_chemical_names(old_reagents)
        ## todo: move to validation
        #if old_reagent_species_names != portion_species_names:
        #    modlog.error(("Old and new reagents must be made out of the same chemicals." +
        #                  f"\nNew reagent chemicals: {portion_species_names}" +
        #                  f"\nOld reagent chemicals: {old_reagent_species_names}"))
        #    sys.exit(1)
        old_reagent_vectors = build_reagent_vectors(old_reagents,
                                                    old_reagent_species_names)
    else:
        old_reagent_vectors = None

    ws = WolframSampler()
    experiments = ws.randomlySample(reagent_vectors, old_reagent_vectors,
                                    int(wellnum), float(maxconc),
                                    float(volmax))
    ws.terminate()
    #TODO: the randomly sample returns bogus entires (doesn't error) if the run is not properly constructure
    # Validation should be done prior to feeding into mathematica
    try:
        portion_df = pd.DataFrame.from_dict(experiments)
    except ValueError:
        modlog.error(
            f'The .xlsx specification cannot be used to generate samples!')
        modlog.error(
            f'Please ensure the run is correctly specified. See FAQs for suggestions.'
        )
        print(
            f'User information and FAQs can be found at: https://docs.google.com/document/d/1RQJvAlDVIfu19Tea23dLUSymLabGfwJtDnZwANtU05s/edit#bookmark=id.8sg0qwagd7yw'
        )
        import sys
        sys.exit()
    # todo How long can this reagent 6/7 hotfix remain like this? Answer: Forever
    if rxndict['ExpWorkflowVer'] <= 1.1:
        if rxndict['lab'] in ['LBL', 'HC'] and ('Reagent7 (ul)'
                                                in portion_df.columns):
            portion_df['Reagent6 (ul)'] = np.floor(
                portion_df['Reagent7 (ul)'] / 2).astype(int)
            portion_df['Reagent7 (ul)'] = np.ceil(portion_df['Reagent7 (ul)'] /
                                                  2).astype(int)
            rdict['6'] = rdict['7']

    portion_mmol_df = pd.DataFrame()
    for col in portion_df.columns:
        reagent = int(
            col.split('t')[1].split('(')[0])  # 'Reagent2 (ul)' to give '2'
        mmol_df = calcs.mmolextension((portion_df[col]), rdict, experiment,
                                      reagent)
        portion_mmol_df = pd.concat([portion_mmol_df, mmol_df], axis=1)

    experiment_mmol_df = pd.concat([experiment_mmol_df, portion_mmol_df],
                                   axis=1)
    experiment_df = pd.concat([experiment_df, portion_df], axis=1)

    portionnum += 1
    if portionnum < len(expoverview):
        modlog.warn(
            "Using default sampler for portion 2, mathematica sampling not supported for greater than first portion"
        )
        # version2 variable doesn't get used in this case, toss to a garbage variable and retain the mathematica version
        prdf, prmmoldf, version2 = default_sampling(
            expoverview,
            rdict,
            vollimits,
            rxndict,
            wellnum,
            userlimits,
            experiment,
            portion_start_idx=portionnum)
        experiment_mmol_df = pd.concat([experiment_mmol_df, prmmoldf], axis=1)
        experiment_df = pd.concat([experiment_df, prdf], axis=1)

    return experiment_df, experiment_mmol_df, version
예제 #4
0
def default_sampling(expoverview,
                     rdict,
                     vollimits,
                     rxndict,
                     wellnum,
                     userlimits,
                     experiment,
                     portion_start_idx=0):
    """Ian's original sampling implementation.

    Performs samplings within portions of the expoverview, starting from portion_start_idx

    :return: (experiment volume df, experiment mmol df, version number of this sampler)
    """
    version = 2.7  # random sampling >3 chemicals (non-zero), maintains random sampling on secondary portions
    prdf = pd.DataFrame()
    prmmoldf = pd.DataFrame()

    portionnum = portion_start_idx
    while portionnum < len(expoverview):
        # need the volume minimum and maximum and well count
        reagentcount = 1
        reagenttotal = len(expoverview[portionnum])

        # Determine from the chemicals and the remaining volume the maximum and
        # minimum volume possible for the sobol method

        volmax = vollimits[portionnum][1]

        # unoptimized code that ensure that the previous reagents are considered and that the final reagent accurately
        # fills to the minimum volume set by the users "fill to" requirement

        rdf = pd.DataFrame()
        mmoldf = pd.DataFrame()
        finalrdf = pd.DataFrame()
        finalmmoldf = pd.DataFrame()

        for reagent in expoverview[portionnum]:
            finalvolmin = vollimits[portionnum][0]
            if reagentcount == 1:
                if len(expoverview[portionnum]) == 1:
                    volmin = vollimits[portionnum][0]
                    volmax = vollimits[portionnum][1] + 0.00001
                else:
                    volmin = 0

                # since all of the volume limits for the first draw are the same these can be
                # treated as a bounded search sequence
                rvolmax, rvolmin = calcvollimit(userlimits, rdict, volmax,
                                                volmin, experiment,
                                                expoverview[portionnum],
                                                reagent, wellnum)

                # Returns datafram of volumes of each reagent added to each experiment
                rdf = initialrdf(rvolmax, rvolmin, reagent, wellnum)

                # Returns mmol specified dataframe for each experiment, reagent, and
                # chemical (<-- values are in the header) have been generated for this portion of the experiment

                mmoldf = calcs.mmolextension((rdf['Reagent%s (ul)' % reagent]),
                                             rdict, experiment, reagent)
                reagentcount += 1
            # operate within the available ranges taken from the previous constraints
            elif reagentcount < reagenttotal:
                # The constraints on the middle draws are more complicated and are dependent upon the first,
                # a different sampling strategy must be used
                # (i.e. this is not going to use sobol as the ranges are different for each)
                # Constrain the range based on volume, reagent-chemical concentrations and user constraints

                rvolmaxdf, rvolmindf = calcvollimitdf(
                    finalrdf, mmoldf, userlimits, rdict, volmax, volmin,
                    experiment, expoverview[portionnum], reagent, wellnum,
                    rxndict)
                # Since each volume maximum is different, need to sample the remaining reagents independently
                # (thus different sampling)
                rdf = rdfbuilder(rvolmaxdf, rvolmindf, reagent, wellnum)
                mmoldf = calcs.mmolextension((rdf['Reagent%s (ul)' % reagent]),
                                             rdict, experiment, reagent)
                reagentcount += 1

            # Ensure that the final round meets the lower bounds and upper bound total
            # fill volume requirements of the user
            elif reagentcount == reagenttotal:
                if vollimits[portionnum][0] == vollimits[portionnum][1]:
                    # print(finalrdf.sum(axis=1))
                    rvolmaxdf, rvolmindf = calcvollimitdf(
                        finalrdf, mmoldf, userlimits, rdict, volmax, volmin,
                        experiment, expoverview[portionnum], reagent, wellnum,
                        rxndict)
                    reagentname = "Reagent%s (ul)" % reagent
                    rdf = pd.DataFrame(rvolmaxdf, columns=[reagentname])
                    mmoldf = calcs.mmolextension(
                        (rdf['Reagent%s (ul)' % reagent]), rdict, experiment,
                        reagent)
                else:
                    rvolmaxdf, rvolmindf = calcvollimitdf(
                        finalrdf, mmoldf, userlimits, rdict, volmax, volmin,
                        experiment, expoverview[portionnum], reagent, wellnum,
                        rxndict)
                    rvolmindf = ensuremin(rvolmindf, finalrdf, finalvolmin)
                    rdf = rdfbuilder(rvolmaxdf, rvolmindf, reagent, wellnum)
                    mmoldf = calcs.mmolextension(
                        rdf['Reagent%s (ul)' % reagent], rdict, experiment,
                        reagent)
                reagentcount += 1
            else:
                modlog.error(
                    "Fatal error.  Unable to effectively parse reagent%s in portion %s.  \
                Please make sure that the selected values make chemical sense!"
                    % (reagent, expoverview[portionnum]))
            finalrdf = pd.concat([finalrdf, rdf], axis=1)
            finalmmoldf = pd.concat([finalmmoldf, mmoldf], axis=1)

        prdf = pd.concat([prdf, finalrdf], axis=1)
        prmmoldf = pd.concat([prmmoldf, finalmmoldf], axis=1)
        portionnum += 1

    return prdf, prmmoldf, version