def __init__(self, arg_str=None): OptimizedInterleave.__init__(self, arg_str) parser = argparse.ArgumentParser() parser.add_argument('--allowed_leavings', choices=['prefix_constraint', 'prefix_constraint_va'], default='prefix_constraint_va') parser.add_argument("--credit_va", action="store_true", default=False) parser.add_argument("--um_class", type=str, default="environment.FederatedClickModel") parser.add_argument("--um_args", type=str, default="0.2 0.1") args = vars(parser.parse_known_args(split_arg_str(arg_str))[0]) self.allowed_leavings = getattr(self, args['allowed_leavings']) if args["credit_va"]: self.precompute_rank = self.precompute_rank_va self.um_class = get_class(args["um_class"]) self.um = self.um_class(args["um_args"])
args["output_dir"]) config_bk = os.path.join(args["output_dir"], "config_bk.yml") logging.info("Backing up configuration to: %s" % config_bk) config_bk_file = open(config_bk, "w") yaml.dump(args, config_bk_file, default_flow_style=False) config_bk_file.close() # initialize and run the experiment num_run times run_start_id = args["run_start_id"] num_runs = args["num_runs"] if args.get("num_random_draws") is not None: # Redefine num_runs, and use args["num_runs"] only when drawing # pair of rankers in the run() function above. num_runs = args["num_random_draws"] assert run_start_id == 0, "Conflicting options" experimenter = get_class(args["experimenter"]) # set the random seed random.seed(42) if "processes" in args and args["processes"] > 1: from multiprocessing import Pool pool = Pool(processes=args["processes"]) for run_id in range(run_start_id, run_start_id + num_runs): pool.apply_async(run, (run_id, experimenter, args,)) pool.close() pool.join() else: for run_id in range(run_start_id, run_start_id + num_runs): run(run_id, experimenter, args)
def __init__(self, args_str=None): # parse arguments parser = argparse.ArgumentParser(description=""" Construct and run a learning experiment. Provide either the name of a config file from which the experiment configuration is read, or provide all arguments listed under Command line. If both are provided the config file is ignored.""", prog=self.__class__.__name__) # option 1: use a config file file_group = parser.add_argument_group("FILE") file_group.add_argument("-f", "--file", help="Filename of the config " "file from which the experiment details" " should be read.") # option 2: specify all experiment details as arguments detail_group = parser.add_argument_group("DETAILS") detail_group.add_argument( "-i", "--training_queries", help="File from which to load the training queries (svmlight " "format).") detail_group.add_argument( "-j", "--test_queries", help="File from which to load the test queries (svmlight format).") detail_group.add_argument( "-c", "--feature_count", type=int, help="The number of features included in the data.") detail_group.add_argument( "-r", "--num_runs", type=int, help="Number of runs (how many times to repeat the experiment).") detail_group.add_argument("-q", "--num_queries", type=int, help="Number of queries in each run.") detail_group.add_argument("-u", "--user_model", help="Class implementing a user model.") detail_group.add_argument( "-v", "--user_model_args", help="Arguments for initializing the user model.") # the retrieval system maintains ranking functions, accepts queries and # generates result lists, and in return receives user clicks to learn # from detail_group.add_argument( "-s", "--system", help="Which system to use (e.g., pairwise, listwise).") detail_group.add_argument("-a", "--system_args", help="Arguments for " "the system (comparison method, learning " "algorithm and parameters...).") detail_group.add_argument( "-o", "--output_dir", help="(Empty) directory for storing output generated by this" " experiment. Subdirectory for different folds will be generated" "automatically.") detail_group.add_argument("--output_dir_overwrite", default="False") detail_group.add_argument( "-p", "--output_prefix", help="Prefix to be added to output filenames, e.g., the name of " "the data set, fold, etc. Output files will be stored as " "OUTPUT_DIR/PREFIX-RUN_ID.txt") detail_group.add_argument("-e", "--experimenter", help="Experimenter type.") detail_group.add_argument("-sd", "--seed", type=int) # run the parser if args_str: args = parser.parse_known_args(args_str.split())[0] else: args = parser.parse_known_args()[0] # determine whether to use config file or detailed args self.experiment_args = None self.args_file = args.file if args.file: config_file = open(args.file) self.experiment_args = yaml.load(config_file) config_file.close() # overwrite with command-line options if given for arg, value in vars(args).items(): if value: self.experiment_args[arg] = value else: self.experiment_args = vars(args) # workaround - check if we have all the arguments needed if not ("training_queries" in self.experiment_args and "test_queries" in self.experiment_args and "feature_count" in self.experiment_args and "num_runs" in self.experiment_args and "num_queries" in self.experiment_args and "user_model" in self.experiment_args and "user_model_args" in self.experiment_args and "system" in self.experiment_args and "system_args" in self.experiment_args and "output_dir" in self.experiment_args): parser.print_help() sys.exit("Missing required arguments, please check the program" " arguments or configuration file. %s" % self.experiment_args) # set default values for optional arguments if "query_sampling_method" not in self.experiment_args: self.experiment_args["query_sampling_method"] = "random" if "output_dir_overwrite" not in self.experiment_args: self.experiment_args["output_dir_overwrite"] = False if "experimenter" not in self.experiment_args: self.experiment_args["experimenter"] = \ "experiment.LearningExperiment.LearningExperiment" if "evaluation" not in self.experiment_args: self.experiment_args["evaluation"] = "evaluation.NdcgEval" if "processes" not in self.experiment_args: self.experiment_args["processes"] = 0 if "seed" not in self.experiment_args: np.random.seed(42) else: np.random.seed(self.experiment_args['seed']) # locate or create directory for the current fold if not os.path.exists(self.experiment_args["output_dir"]): os.makedirs(self.experiment_args["output_dir"]) elif not (self.experiment_args["output_dir_overwrite"]) and \ os.listdir(self.experiment_args["output_dir"]): # make sure the output directory is empty raise Exception( "Output dir %s is not an empty directory. Please" " use a different directory, or move contents out of the way." % self.experiment_args["output_dir"]) logging.basicConfig( format='%(levelname)s %(module)s %(asctime)s: %(message)s', level=logging.INFO) logging.info("Arguments: %s" % self.experiment_args) # Printing out arguments that are used in execution for k, v in sorted(self.experiment_args.items()): logging.info("\t%s: %s" % (k, v)) config_bk = os.path.join(self.experiment_args["output_dir"], "config_bk.yml") logging.info("Backing up configuration to: %s" % config_bk) with open(config_bk, "w") as config_bk_file: yaml.dump(self.experiment_args, config_bk_file, default_flow_style=False) # load training and test queries training_file = self.experiment_args["training_queries"] test_file = self.experiment_args["test_queries"] self.feature_count = self.experiment_args["feature_count"] logging.info("Loading training data: %s " % training_file) self.training_queries = load_queries(training_file, self.feature_count) logging.info("... found %d queries." % self.training_queries.get_size()) logging.info("Loading test data: %s " % test_file) self.test_queries = load_queries(test_file, self.feature_count) logging.info("... found %d queries." % self.test_queries.get_size()) # initialize and run the experiment num_run times self.num_runs = self.experiment_args["num_runs"] self.output_dir = self.experiment_args["output_dir"] self.output_prefix = self.experiment_args["output_prefix"] self.experimenter = get_class(self.experiment_args["experimenter"])
def __init__(self): # parse arguments parser = argparse.ArgumentParser(description="""Meta experiment""") file_group = parser.add_argument_group("FILE") file_group.add_argument("-f", "--file", help="Filename of the config " "file from which the experiment details" " should be read.") # option 2: specify all experiment details as arguments detail_group = parser.add_argument_group("DETAILS") detail_group.add_argument("-p", "--platform", help="Specify " "'local' or 'celery'") detail_group.add_argument('--data', help="Data in the following" "format: trainfile,testfile,d,r such that " "a data file can be found in " "datadir/trainfile/Fold1/train.txt", type=str, nargs="+") detail_group.add_argument('--um', nargs="+") detail_group.add_argument('--uma', help="", type=str, nargs="+") detail_group.add_argument('--analysis', nargs="*") detail_group.add_argument('--data_dir') detail_group.add_argument('--output_base') detail_group.add_argument('--experiment_name') detail_group.add_argument("-r", "--rerun", action="store_true", help="Rerun last experiment.", default=False) detail_group.add_argument("--queue_name", type=str) args = parser.parse_known_args()[0] logging.basicConfig(format='%(asctime)s %(module)s: %(message)s', level=logging.INFO) # determine whether to use config file or detailed args self.experiment_args = None if args.file: config_file = open(args.file) config = yaml.load(config_file, Loader=yaml.Loader) self.experiment_args = config config_file.close() try: self.meta_args = vars(parser.parse_known_args( self.experiment_args["meta"].split())[0]) except: parser.error("Please make sure there is a 'meta' section " "present in the config file") # overwrite with command-line options if given for arg, value in vars(args).items(): if value: self.meta_args[arg] = value else: self.meta_args = vars(args) for k in list(self.meta_args.keys()) + ["meta"]: if k in self.experiment_args: del self.experiment_args[k] if self.meta_args["platform"] == "local": self.run = self.run_local elif self.meta_args["platform"] == "conf": self.run = self.run_conf else: parser.error("Please specify a valid platform.") usermodels = {} for umstr in self.meta_args["uma"]: parts = umstr.split(',') um, car = parts[:2] car = int(car) if len(parts) != car * 2 + 2: parser.error("Error in uma") p_click = ", ".join(parts[2:2 + car]) p_stop = ", ".join(parts[2 + car:]) if not um in usermodels: usermodels[um] = {} usermodels[um][car] = "--p_click %s --p_stop %s" % \ (p_click, p_stop) basedir = os.path.join(os.path.abspath(self.meta_args["output_base"]), self.meta_args["experiment_name"]) i = 0 while os.path.exists(os.path.join(basedir, "v%03d" % i)): i += 1 if i > 0 and self.meta_args["rerun"]: i -= 1 logging.info("Running experiment v%03d" % i) basedir = os.path.join(basedir, "v%03d" % i) if not os.path.exists(basedir): os.makedirs(basedir) logging.info("Results appear in %s" % basedir) config_bk = os.path.join(basedir, "meta_config_bk.yml") with open(config_bk, "w") as config_bk_file: yaml.dump(self.meta_args, config_bk_file, default_flow_style=False, Dumper=yaml.Dumper) skip = 0 self.configurations = [] for run_id in range(self.experiment_args["num_runs"]): for um in self.meta_args["um"]: for dstr in self.meta_args["data"]: dparts = dstr.split(',') data, d, r = dparts[:3] d, r = int(d), int(r) user_model_args = usermodels[um][r] folds = glob.glob(os.path.join( os.path.abspath(self.meta_args["data_dir"]), data, "Fold*")) for fold in folds: args = self.experiment_args.copy() if len(dparts) > 3: selected_weights = ",".join(dparts[3:]) args["system_args"] += " --selected_weights " + \ selected_weights args["data_dir"] = self.meta_args["data_dir"] args["fold_dir"] = fold # args["run_id"] = run_id args["feature_count"] = d args["user_model_args"] = user_model_args args["output_dir"] = os.path.join(basedir, 'output', um, data, os.path.basename(fold)) args["output_prefix"] = os.path.basename(fold) args["run_id"] = run_id if self.meta_args["rerun"]: if not os.path.exists(os.path.join( args["output_dir"], "%s-%d.txt.gz" % (args["output_prefix"], run_id))): self.configurations.append(args) else: skip += 1 else: self.configurations.append(args) logging.info("Created %d configurations (and %d skipped)" % ( len(self.configurations), skip)) self.analytics = [] if self.meta_args["analysis"]: for analyse in self.meta_args["analysis"]: aclass = get_class(analyse) a = aclass(basedir) self.analytics.append(a)