def get_reults(self, bellwethers=None, n_reps=12): data_path = os.path.realpath("./data") projects = get_all_projects(data_path) for project in projects: print(project.name.upper()) files = project.files() results_0 = [] for transfer in [Pooyan, Baseline, Waterloo]: results_1 = [transfer.__doc__.upper()] for source_name, source_conf in files.iteritems(): for target_name, target_conf in files.iteritems(): if not source_name == target_name: if transfer.__doc__ == "baseline": if source_name in bellwethers[project.name]: results_1.extend([ transfer.learner( source_conf, target_conf) for _ in xrange(n_reps) ]) else: results_1.extend([ transfer.learner(source_conf, target_conf) for _ in xrange(n_reps) ]) results_0.append(results_1) rdivDemo(results_0) print("")
def main(n_reps=30): data_path = os.path.realpath("./data") projects = get_all_projects(data_path) results = dict() for project in projects: files = project.files() results_0 = dict() for source_name, source_conf in files.iteritems(): results_0.update({source_name: {}}) for target_name, target_conf in files.iteritems(): if not source_name == target_name: r_diff = [] for _ in xrange(n_reps): "Construct a prediction model using source" predict_model = train_prediction_model(source_conf, T=5) """Sample 15 from train and test datasets to train a transfer model """ "Find common configs between source and target" common = pd.merge(source_conf, target_conf, how="inner") "Pick random 15 samples" some = common.sample(n=10) "Get the dependent variables to construct a LR model" p_src = some[source_conf.columns[-1]] p_tgt = some[target_conf.columns[-1]] "Train a transfer model" transfer_model = train_transfer_model(p_src=p_src, p_tgt=p_tgt) "Remove elements used to train transfer model from target" target_conf = target_conf.drop(some.index, errors="ignore") "Perform tansfer" target_indep = target_conf[target_conf.columns[:-1]] target_actual = target_conf[target_conf.columns[-1]] predicted_raw = predict_model.predict( target_indep).reshape(-1, 1) target_predicted = transfer_model.predict( predicted_raw).reshape(1, -1)[0] "Get rank difference" r_diff.append( rank_diff(actual=target_actual, predicted=target_predicted)) results_0[source_name].update( {target_name: int(np.median(r_diff))}) results.update({project.name: pd.DataFrame(results_0)}) # -------------------- DEBUG -------------------- # set_trace()
def compare(self, data_pairs): data_path = os.path.realpath("./data") projects = get_all_projects(data_path) for project in projects: print(project.name.upper()) files = project.files() best, worst = data_pairs[project.name] rest = [ dframe for fname, dframe in files.iteritems() if fname not in data_pairs[project.name] ] best_results = ["best"] worst_results = ["worst"] for tgt in rest: best_results.extend( [Pooyan.learner(files[best], tgt) for _ in xrange(1)]) worst_results.extend( [Pooyan.learner(files[worst], tgt) for _ in xrange(1)]) rdivDemo([best_results, worst_results]) set_trace()
def main(n_reps=30): data_path = os.path.realpath("./data") projects = get_all_projects(data_path) results = dict() for project in projects: files = project.files() results_0 = dict() for source_name, source_conf in files.iteritems(): results_0.update({source_name: {}}) for target_name, target_conf in files.iteritems(): if not source_name == target_name: r_diff = [] for _ in xrange(n_reps): "Get the dependent variables to construct a LR model" p_src = source_conf[source_conf.columns[-1]] p_tgt = target_conf[target_conf.columns[-1]] "Construct a gaussian process model using source" predict_model = train_gaussproc_model( source_conf, target_conf) "Perform tansfer" target_indep = target_conf[target_conf.columns[:-1]] target_actual = target_conf[target_conf.columns[-1]] target_predicted = predict_model.predict( target_indep).reshape(-1, 1) "Get rank difference" r_diff.append( rank_diff(actual=target_actual, predicted=target_predicted)) results_0[source_name].update( {target_name: int(np.median(r_diff))}) results.update({project.name: pd.DataFrame(results_0)}) # -------------------- DEBUG -------------------- # set_trace()