def best_interpolated_multi_search_result(tissue, path, prefix, n_steps, lambd=0, obj="avg", max_dosage=8000, verification=False): """ This methods retrieves the best single-step treatment from the multi-cell experiments and interpolates it over multiple steps. """ data = load_data(path, tissue, prefix=prefix, format="csv") objective = np.inf for i in data[data['threshold'] == max_dosage].index: assert data["threshold"][i] == max_dosage, "Retrieval from pandas frame did not work as expected." temp_prolif_list = recover_numbers_from_list(data["relative_proliferation"][i]) assert len(temp_prolif_list) == len(retrieve_lines(tissue)), "Number of proliferation values is off." if obj == "avg": temp_prolif = np.average(temp_prolif_list ** n_steps) elif obj == "worst": temp_prolif = np.max(temp_prolif_list ** n_steps) else: raise ValueError("Specified objective is unknown.") temp_objective = temp_prolif + lambd * data["total_concentration"][i] * n_steps if temp_objective <= objective: objective = temp_objective rel_prolif = temp_prolif prolif_list = temp_prolif_list ** n_steps concentration = data["total_concentration"][i] * n_steps treatments = [row_to_treatment(data.iloc[i]) for _ in range(n_steps)] if verification: print("- Verifying search") verify_sequential_search_result(retrieve_lines(tissue), n_steps, treatments, concentration, prolif_list, objective, obj, lambd) # treatment, concentration, relative proliferation, objective value return treatments, concentration, rel_prolif, objective
def best_multi_search_result(tissue, path, prefix, lambd=0, obj="avg", max_dosage=8000, verification=False): """ Function for retrieval of multi-cell search results. """ data = load_data(path, tissue, prefix=prefix, format="csv") objective = np.inf for i in data[data['threshold'] == max_dosage].index: assert data["threshold"][i] == max_dosage, "Retrieval from pandas frame did not work as expected." temp_prolif_list = recover_numbers_from_list(data["relative_proliferation"][i]) assert len(temp_prolif_list) == len(retrieve_lines(tissue)), "Number of proliferation values is off." if obj == "avg": temp_prolif = np.average(temp_prolif_list) elif obj == "worst": temp_prolif = np.max(temp_prolif_list) else: raise ValueError("Specified objective is unknown.") temp_objective = temp_prolif + lambd * data["total_concentration"][i] if temp_objective <= objective: objective = temp_objective rel_prolif = temp_prolif prolif_list = temp_prolif_list concentration = data["total_concentration"][i] treatment = row_to_treatment(data.iloc[i]) if verification: print("- Verifying search") verify_search_result(retrieve_lines(tissue), treatment, concentration, prolif_list, objective, obj, lambd) # treatment, concentration, relative proliferation, objective value return treatment, concentration, rel_prolif, objective
def cma_experiment(tissue, n_steps, domain, objective, prefix, seed): print(tissue) print("-----------------------") cell_lines = retrieve_lines(tissue) res_dict = initialize_sequential_result_dictionary(n_steps) res_dict["threshold"] = [] for T in THRES: conf = { "n_steps": n_steps, "cell_lines": cell_lines, "objective": objective, "max_dosage": T, "domain": domain, "scale": SCALE } evaluator = Evaluator( conf, n_envs=N_ENVS, store=STORE, repeated=True ) # NOTE: repeated causes the evaluator to use the same treatment at every step mu, obj, rel_prolif = cma_es(evaluator, domain, MAX_ITER, verbose=True, seed=seed) assert len(rel_prolif) == len( cell_lines ), "Number of proliferations differs from number of cell lines." update_sequential_result_dictionary(res_dict, [np.concatenate([mu] * n_steps)], [rel_prolif], T, SCALE, n_steps) res_dict["threshold"].append(T) evaluator.terminate() store(res_dict, PATH, tissue, prefix, format="csv")
def get_data_dual(lambdas): comb_data = build_combined_dual_frame(retrieve_lines(TISSUE)) proliferations, objectives, concentrations = [], [], [] for lambd in lambdas: treat, d_dos, d_pro, d_obj = best_dual_treatment( retrieve_lines(TISSUE), lambd=lambd, obj=OBJECTIVE, max_dosage=THRESHOLD, verification=VERIFICATION, comb_data=comb_data) proliferations.append(d_pro) objectives.append(d_obj) concentrations.append(d_dos) if len(proliferations) % 5 == 0: print(" ...%2d out of %d lambdas loaded..." % (len(proliferations), len(lambdas))) return proliferations, objectives, concentrations
def main(): parser = argparse.ArgumentParser( description='Create baselines for tissue.') parser.add_argument("-t", '--tissue', metavar='tissue', type=str, required=True, help='the name of the relevant tissue. \ Possible tissues are "breast", "intestine", "lung", "pancreas", "skin" and "initial".' ) args = parser.parse_args() cell_lines = retrieve_lines(args.tissue) single_baseline(cell_lines) if DUAL_BASELINE: two_baseline(cell_lines) print("----------------------------------------") print("Completed experimentation and stored baseline data successfully.")
dual_all_prolifs, label="PD0325901+PLX-4720") ax.set_xlabel("Total dosage ($\\mu$M)") ax.set_ylabel("Max relative proliferation") ax.set_xticks([0, 1, 2, 3, 4, 5, 6, 7, 8]) ax.legend(prop={'size': 'small'}) ax.grid() # ------------------------------------------------------------ # Executing code # ------------------------------------------------------------ # retrieve relevant cell-lines cell_lines = retrieve_lines(TISSUE) # create a plot for each of them: for line in cell_lines: print(f"\n====== cell-line: {line} ======\n") t0 = time.time() print(" *** Search results ***") lambdas, search_prolifs, search_objectives, search_concentration, search_treatments = get_data_search( line) print(" >>> Total time: ", round(time.time() - t0, 2), " seconds <<<\n") t0 = time.time() print(" *** Single drug results ***") single_prolifs, single_objectives, single_concentration = get_data_single( line, lambdas)
t0 = time.time() print("*** Single drug results ***") single_prolifs, single_objectives, single_concentration = get_data_single( lambdas) print(">>> Total time: ", round(time.time() - t0, 2), " seconds <<<\n") t0 = time.time() print("*** Dual drug results ***") dual_prolifs, dual_objectives, dual_concentration = get_data_dual(lambdas) print(">>> Total time: ", round(time.time() - t0, 2), " seconds <<<\n") t0 = time.time() print("*** All single drug results ***") best_drugs, single_all_concentrations, single_all_prolifs = best_single_treatment_by_dosage( retrieve_lines(TISSUE), obj=OBJECTIVE, path="./artifacts/baselines/") print(">>> Total time: ", round(time.time() - t0, 2), " seconds <<<\n") t0 = time.time() print("*** All dual drug results ***") best_ratios, dual_all_concentrations, dual_all_prolifs = best_dual_treatment_by_dosage( retrieve_lines(TISSUE), obj=OBJECTIVE, path="./artifacts/baselines/") print(">>> Total time: ", round(time.time() - t0, 2), " seconds <<<\n") def drug_usage_plot(ax, search_treatments): cmap = matplotlib.cm.get_cmap("tab10") # create new dataframe from combined dictionary drug_usages = { } # drug name -> [drug dosage for lambda in LAMBDA_EXPONENTS]
def main(): parser = argparse.ArgumentParser( description='Create baselines for tissue.') parser.add_argument("-t", '--tissue', metavar='tissue', type=str, required=True, help='the name of the relevant tissue. \ Possible tissues are "breast", "intestine", "lung", "pancreas", "skin" and "initial".' ) parser.add_argument( "-l", '--lambd', metavar='lambd', type=float, required=False, default=12345, help= 'Specifies the exponent of the weighting parameter for the linear penalty function. \ If no value is specified the algorithm optimizes only for relative proliferation. \ If a value is specified lambda 10^l is used.') parser.add_argument("-d", '--domain', metavar='domain', type=str, required=True, help='the domain for the optimization process. \ Possible domains are "simplex" and "cube".') parser.add_argument("-r", '--random_seed', metavar='random_seed', type=int, required=True, help='Seed for random number generator.') if not os.path.isdir(PATH): os.mkdir(PATH) args = parser.parse_args() seed = args.random_seed cell_lines = retrieve_lines(args.tissue) lambd = args.lambd domain = retrieve_domain(args.domain, seed=seed) if lambd == 12345: lambd = 0 prefix = args.domain + "_" + "prolif" + "_cma_es" # create prefix here and then give it to function else: prefix = args.domain + "_" + str(lambd).replace( ".", "_") + "_cma_es" # create prefix here and then give it to function lambd = 10**lambd print("Prefix:", prefix) print("Lambda:", lambd) print("") print("Running optimization...") for cell_line in cell_lines: cma_experiment(cell_line, domain, lambd, prefix, seed) print("Completed optimization.") print("\n----------------------------------------") print("Stored results successfully.")
def setUp(self): self.tissue = "skin" self.cell_lines = retrieve_lines(self.tissue) self.lambd = 10**-4.5 self.rows = [10, 47, 430, 1150] self.n_steps = 2