intercept = val elif opt == "--lambda": param = val elif opt == "--lambda_range": param_range = val.split(",") elif opt == "--export_params": export_params = val elif opt in ("-l", "--log"): log_file = val elif opt in ("-h", "--help"): usage() sys.exit(0) else: usage(1) log_utils.config_logging(log_file) utils.assert_option_not_none(in_file, "Input file required", usage) utils.assert_option_not_none(out_dir, "Output directory required", usage) utils.assert_option_not_none(model, "Model to be trained required", usage) utils.assert_option_not_none(arg_space, "Argument space(s) file(s) required", usage) utils.assert_option_not_none(phrase_space, "Phrase space file required", usage) crossvalidation = eval(crossvalidation) intercept = eval(intercept) utils.assert_bool(intercept, "intercept must be True/False", usage) utils.assert_bool(crossvalidation, "crossvalidation must be True/False", usage)
in_dir = val elif opt in ("-m", "--sim_measures"): sim_measures = val.split(",") elif opt in ("-s", "--space"): spaces = val.split(",") elif opt in ("-c", "--columns"): columns = val.split(",") elif opt in ("-l", "--log"): log_file = val elif opt in ("-h", "--help"): usage() sys.exit(0) else: usage(1) log_utils.config_logging(log_file) utils.assert_option_not_none(in_file, "Input file required", usage) utils.assert_option_not_none(out_dir, "Output directory required", usage) utils.assert_option_not_none(sim_measures, "Similarity measures required", usage) utils.assert_option_not_none(columns, "Columns to be read from input file required", usage) if not in_dir is None: compute_sim_batch(in_file, columns, out_dir, sim_measures, in_dir) else: utils.assert_option_not_none(spaces, "Semantic space file required", usage) compute_sim(in_file, columns, out_dir, sim_measures, spaces) if __name__ == '__main__': main(sys.argv)
def main(sys_argv): try: opts, argv = getopt.getopt(sys_argv[1:], "hi:o:c:l:", [ "help", "input=", "output=", "core=", "log=", "input_format=", "output_format=", "core_in_dir=", "core_filter=", "gz=" ]) except getopt.GetoptError as err: print(str(err)) usage() sys.exit(1) out_dir = None in_file_prefix = None core_space_file = None log_file = './build_core_space.log' in_format = None out_format = None core_in_dir = None core_filter = "" gz = "False" section = "build_peripheral_space" if len(argv) == 1: config_file = argv[0] config = ConfigParser() config.read(config_file) out_dir = utils.config_get(section, config, "output", None) in_file_prefix = utils.config_get(section, config, "input", None) core_space_file = utils.config_get(section, config, "core", None) core_in_dir = utils.config_get(section, config, "core_in_dir", None) core_filter = utils.config_get(section, config, "core_filter", "") log_file = utils.config_get(section, config, "log", './build_core_space.log') in_format = utils.config_get(section, config, "input_format", None) out_format = utils.config_get(section, config, "output_format", None) gz = utils.config_get(section, config, "gz", gz) for opt, val in opts: if opt in ("-i", "--input"): in_file_prefix = val elif opt in ("-o", "--output"): out_dir = val elif opt == "--gz": gz = val elif opt in ("-c", "--core"): core_space_file = val elif opt in ("-l", "--log"): log_file = val elif opt == "--input_format": in_format = val elif opt == "--output_format": out_format = val elif opt == "--core_in_dir": core_in_dir = val elif opt == "--core_filter": core_filter = val elif opt in ("-h", "--help"): usage() sys.exit(0) else: usage(1) log_utils.config_logging(log_file) utils.assert_option_not_none(in_file_prefix, "Input file prefix required", usage) utils.assert_option_not_none(out_dir, "Output directory required", usage) utils.assert_option_not_none(in_format, "Input file format required", usage) gz = eval(gz) utils.assert_bool(gz, "--gz value must be True/False", usage) if not core_in_dir is None: build_space_batch(in_file_prefix, in_format, out_dir, out_format, core_in_dir, core_filter, gz) else: utils.assert_option_not_none(core_space_file, "Input file required", usage) build_space(in_file_prefix, in_format, out_dir, out_format, core_space_file, gz)
def main(sys_argv): try: opts, argv = getopt.getopt(sys_argv[1:], "hi:m:c:l:", [ "help", "input=", "correlation_measure=", "columns=", "log=", "in_dir=", "filter=" ]) except getopt.GetoptError as err: print(str(err)) usage() sys.exit(1) in_file = None in_dir = None filter_ = "" corr_measures = None columns = None log_file = None section = "evaluate_similarities" if (len(argv) == 1): config_file = argv[0] config = ConfigParser() config.read(config_file) in_file = utils.config_get(section, config, "input", None) in_dir = utils.config_get(section, config, "in_dir", None) filter_ = utils.config_get(section, config, "filter", filter_) corr_measures = utils.config_get(section, config, "correlation_measure", None) if not corr_measures is None: corr_measures = corr_measures.split(",") columns = utils.config_get(section, config, "columns", None) if not columns is None: columns = columns.split(",") log_file = utils.config_get(section, config, "log", None) for opt, val in opts: if opt in ("-i", "--input"): in_file = val elif opt in ("-m", "--correlation_measure"): corr_measures = val.split(",") elif opt in ("-c", "--columns"): columns = val.split(",") elif opt == "--in_dir": in_dir = val elif opt == "--filter": filter_ = val elif opt in ("-l", "--log"): log_file = val elif opt in ("-h", "--help"): usage() sys.exit(0) else: usage(1) log_utils.config_logging(log_file) utils.assert_option_not_none(corr_measures, "Correlation measures required", usage) utils.assert_option_not_none( columns, "Columns to be read from input file required", usage) if len(columns) != 2: raise ValueError( "Columns (-c) field should contain two comma-separated integers (e.g. -c 3,4)" ) if not in_dir is None: evaluate_sim_batch(in_dir, columns, corr_measures, filter_) else: utils.assert_option_not_none(in_file, "Input file required", usage) evaluate_sim(in_file, columns, corr_measures)
def main(sys_argv): try: opts, argv = getopt.getopt(sys_argv[1:], "hi:o:m:a:l:", [ "help", "input=", "output=", "model=", "alpha=", "beta=", "lambda=", "arg_space=", "load_model=", "output_format=", "log=" ]) except getopt.GetoptError as err: print(str(err)) usage() sys.exit(1) out_dir = None in_file = None model = None arg_space = None trained_model = None alpha = None beta = None lambda_ = None log_file = None out_format = None section = "apply_composition" if (len(argv) == 1): config_file = argv[0] config = ConfigParser() config.read(config_file) out_dir = utils.config_get(section, config, "output", None) in_file = utils.config_get(section, config, "input", None) model = utils.config_get(section, config, "model", None) trained_model = utils.config_get(section, config, "load_model", None) arg_space = utils.config_get(section, config, "arg_space", None) if not arg_space is None: arg_space = arg_space.split(",") alpha = utils.config_get(section, config, "alpha", None) beta = utils.config_get(section, config, "beta", None) lambda_ = utils.config_get(section, config, "lambda", None) log_file = utils.config_get(section, config, "log", None) out_format = utils.config_get(section, config, "output_format", None) print(opts) for opt, val in opts: if opt in ("-i", "--input"): in_file = val elif opt in ("-o", "--output"): out_dir = val elif opt in ("-m", "--model"): model = val elif opt in ("-a", "--arg_space"): arg_space = val.split(",") elif opt == "--load_model": trained_model = val elif opt == "--alpha": alpha = val elif opt == "--beta": beta = val elif opt == "--lambda": lambda_ = val elif opt == "--output_format": out_format = val elif opt in ("-l", "--log"): log_file = val elif opt in ("-h", "--help"): usage() sys.exit(0) else: usage(1) log_utils.config_logging(log_file) utils.assert_option_not_none(in_file, "Input file required", usage) utils.assert_option_not_none(out_dir, "Output directory required", usage) utils.assert_xor_options( model, trained_model, "(Only) one of model name (-m) or file of model object (--load_model) are required!", usage) utils.assert_option_not_none(arg_space, "Argument space(s) file(s) required", usage) if not alpha is None: alpha = float(alpha) if not beta is None: beta = float(beta) if not lambda_ is None: lambda_ = float(lambda_) apply_model(in_file, out_dir, model, trained_model, arg_space, alpha, beta, lambda_, out_format)
def main(sys_argv): try: opts, argv = getopt.getopt(sys_argv[1:], "hi:o:s:m:c:l:", [ "help", "input=", "output=", "sim_measures=", "space=", "in_dir=", "columns=", "log=" ]) except getopt.GetoptError as err: print(str(err)) usage() sys.exit(1) out_dir = None in_file = None sim_measures = None spaces = None columns = None log_file = None in_dir = None section = "compute_similarities" if (len(argv) == 1): config_file = argv[0] config = ConfigParser() config.read(config_file) out_dir = utils.config_get(section, config, "output", None) in_file = utils.config_get(section, config, "input", None) in_dir = utils.config_get(section, config, "in_dir", None) sim_measures = utils.config_get(section, config, "sim_measures", None) if not sim_measures is None: sim_measures = sim_measures.split(",") spaces = utils.config_get(section, config, "space", None) if not spaces is None: spaces = spaces.split(",") columns = utils.config_get(section, config, "columns", None) if not columns is None: columns = columns.split(",") log_file = utils.config_get(section, config, "log", None) for opt, val in opts: if opt in ("-i", "--input"): in_file = val elif opt in ("-o", "--output"): out_dir = val elif opt == ("--in_dir"): in_dir = val elif opt in ("-m", "--sim_measures"): sim_measures = val.split(",") elif opt in ("-s", "--space"): spaces = val.split(",") elif opt in ("-c", "--columns"): columns = val.split(",") elif opt in ("-l", "--log"): log_file = val elif opt in ("-h", "--help"): usage() sys.exit(0) else: usage(1) log_utils.config_logging(log_file) utils.assert_option_not_none(in_file, "Input file required", usage) utils.assert_option_not_none(out_dir, "Output directory required", usage) utils.assert_option_not_none(sim_measures, "Similarity measures required", usage) utils.assert_option_not_none( columns, "Columns to be read from input file required", usage) if not in_dir is None: compute_sim_batch(in_file, columns, out_dir, sim_measures, in_dir) else: utils.assert_option_not_none(spaces, "Semantic space file required", usage) compute_sim(in_file, columns, out_dir, sim_measures, spaces)
def main(sys_argv): try: opts, argv = getopt.getopt(sys_argv[1:], "hi:o:s:m:n:l:", [ "help", "input=", "output=", "sim_measures=", "space=", "log=", "no_neighbours=" ]) except getopt.GetoptError as err: print(str(err)) usage() sys.exit(1) section = "compute_neighbours" out_dir = None in_file = None sim_measure = None spaces = None log_file = None no_neighbours = "20" if (len(argv) == 1): config_file = argv[0] with open(config_file) as f: pass config = ConfigParser() config.read(config_file) out_dir = utils.config_get(section, config, "output", None) in_file = utils.config_get(section, config, "input", None) sim_measure = utils.config_get(section, config, "sim_measure", None) spaces = utils.config_get(section, config, "space", None) if not spaces is None: spaces = spaces.split(",") no_neighbours = utils.config_get(section, config, "no_neighbours", no_neighbours) log_file = utils.config_get(section, config, "log", None) for opt, val in opts: if opt in ("-i", "--input"): in_file = val elif opt in ("-o", "--output"): out_dir = val elif opt in ("-m", "--sim_measure"): sim_measure = val elif opt in ("-s", "--space"): spaces = val.split(",") elif opt in ("-n", "--no_neighbours"): no_neighbours = val elif opt in ("-l", "--log"): log_file = val elif opt in ("-h", "--help"): usage() sys.exit(0) else: usage(1) log_utils.config_logging(log_file) no_neighbours = int(no_neighbours) utils.assert_option_not_none(in_file, "Input file required", usage) utils.assert_option_not_none(out_dir, "Output directory required", usage) utils.assert_option_not_none(sim_measure, "Similarity measure required", usage) utils.assert_option_not_none(spaces, "Semantic space file required", usage) compute_neighbours(in_file, no_neighbours, out_dir, sim_measure, spaces)
def main(sys_argv): try: opts, argv = getopt.getopt(sys_argv[1:], "hi:o:m:r:a:p:l:", ["help", "input=", "output=", "model=", "regression=", "intercept=", "arg_space=", "phrase_space=", "export_params=", "log=", "crossvalidation=", "lambda_range=", "lambda="]) except getopt.GetoptError as err: print(str(err)) usage() sys.exit(1) out_dir = None in_file = None model = None regression = None crossvalidation = "False" intercept = "True" param_range = None arg_space = None phrase_space = None export_params= "False" log_file = None param = None section = "train_composition" if (len(argv) == 1): config_file = argv[0] config = ConfigParser() config.read(config_file) out_dir = utils.config_get(section, config, "output", None) in_file = utils.config_get(section, config, "input", None) model = utils.config_get(section, config, "model", None) regression = utils.config_get(section, config, "regression", None) crossvalidation = utils.config_get(section, config, "crossvalidation", crossvalidation) intercept = utils.config_get(section, config, "intercept", intercept) param_range = utils.config_get(section, config, "lambda_range", None) if not param_range is None: param_range = param_range.split(",") param = utils.config_get(section, config, "lambda", None) arg_space = utils.config_get(section, config, "arg_space", None) if not arg_space is None: arg_space = arg_space.split(",") phrase_space = utils.config_get(section, config, "phrase_space", None) export_params = utils.config_get(section, config, "export_params", export_params) log_file = utils.config_get(section, config, "log", None) for opt, val in opts: if opt in ("-i", "--input"): in_file = val elif opt in ("-o", "--output"): out_dir = val elif opt in ("-m", "--model"): model = val elif opt in ("-a", "--arg_space"): arg_space = val.split(",") elif opt in ("-p", "--phrase_space"): phrase_space = val elif opt in ("-r", "--regression"): regression = val elif opt == "--crossvalidation": crossvalidation = val elif opt == "--intercept": intercept = val elif opt == "--lambda": param = val elif opt == "--lambda_range": param_range = val.split(",") elif opt == "--export_params": export_params = val elif opt in ("-l", "--log"): log_file = val elif opt in ("-h", "--help"): usage() sys.exit(0) else: usage(1) log_utils.config_logging(log_file) utils.assert_option_not_none(in_file, "Input file required", usage) utils.assert_option_not_none(out_dir, "Output directory required", usage) utils.assert_option_not_none(model, "Model to be trained required", usage) utils.assert_option_not_none(arg_space, "Argument space(s) file(s) required", usage) utils.assert_option_not_none(phrase_space, "Phrase space file required", usage) crossvalidation = eval(crossvalidation) intercept = eval(intercept) utils.assert_bool(intercept, "intercept must be True/False", usage) utils.assert_bool(crossvalidation, "crossvalidation must be True/False", usage) export_params = eval(export_params) utils.assert_bool(export_params, "export_params must be True/False", usage) if not param is None: param = float(param) if not param_range is None: param_range = [float(param) for param in param_range] if not crossvalidation and regression == "ridge": utils.assert_option_not_none(param, "Cannot run (no-crossvalidation) RidgeRegression with no lambda value!", usage) train_model(in_file, out_dir, model, arg_space, phrase_space, regression, crossvalidation, intercept, param, param_range, export_params)
def main(sys_argv): try: opts, argv = getopt.getopt(sys_argv[1:], "hi:o:w:s:r:n:l:", [ "help", "input=", "output=", "weighting=", "selection=", "reduction=", "normalization=", "log=", "gz=", "input_format=", "output_format=" ]) except getopt.GetoptError as err: print(str(err)) usage() sys.exit(1) out_dir = None in_file_prefix = None weightings = [None] selections = [None] reductions = [None] normalizations = [None] log_file = None in_format = None out_format = None gz = "False" section = "build_core_space" if len(argv) == 1: config_file = argv[0] with open(config_file) as f: pass config = ConfigParser() config.read(config_file) out_dir = utils.config_get(section, config, "output", None) in_file_prefix = utils.config_get(section, config, "input", None) weightings = utils.config_get(section, config, "weighting", [None]) if not weightings == [None]: weightings = weightings.split(",") selections = utils.config_get(section, config, "selection", [None]) if not selections == [None]: selections = selections.split(",") reductions = utils.config_get(section, config, "reduction", [None]) if not reductions == [None]: reductions = reductions.split(",") normalizations = utils.config_get(section, config, "normalization", [None]) if not normalizations == [None]: normalizations = normalizations.split(",") log_file = utils.config_get(section, config, "log", None) in_format = utils.config_get(section, config, "input_format", None) out_format = utils.config_get(section, config, "output_format", None) gz = utils.config_get(section, config, "gz", gz) for opt, val in opts: if opt in ("-i", "--input"): in_file_prefix = val elif opt in ("-o", "--output"): out_dir = val elif opt == "--gz": gz = val elif opt in ("-w", "--weighting"): weightings = val.split(",") elif opt in ("-s", "--selection"): selections = val.split(",") elif opt in ("-r", "--reduction"): reductions = val.split(",") elif opt in ("-n", "--normalization"): normalizations = val.split(",") elif opt in ("-l", "--log"): log_file = val elif opt == "--input_format": in_format = val elif opt == "--output_format": out_format = val elif opt in ("-h", "--help"): usage(0) else: usage(1) if not log_file is None: log_utils.config_logging(log_file) utils.assert_option_not_none(in_file_prefix, "Input file prefix required", usage) utils.assert_option_not_none(out_dir, "Output directory required", usage) utils.assert_option_not_none(in_format, "Input format required", usage) gz = eval(gz) utils.assert_bool(gz, "--gz value must be True/False", usage) build_spaces(in_file_prefix, in_format, out_dir, out_format, weightings, selections, reductions, normalizations, gz)