def main(): # config = load_config() # print(config) parser = argparse.ArgumentParser() parser.add_argument("embeddings") parser.add_argument("--window_size", default=5, type=int) parser.add_argument("--test", default=True, help='use small test dataset') parser.add_argument("--method", default='lstm', choices=['lr', '2FFNN', 'lstm'], help='name of method') parser.add_argument("--path_out", default=False, help="destination folder to save results") args = parser.parse_args() embeddings = load_from_dir(args.embeddings) # print("embeddings", embeddings) language_modeling = Language_modeling(window_size=args.window_size, method=args.method, test=args.test) results = language_modeling.get_result(embeddings) if args.path_out: if os.path.isdir(args.path_out) or args.path_out.endswith("/"): name_file_out = os.path.join(args.path_out, "language_modeling", "results.json") save_json(results, name_file_out) else: save_json(results, args.path_out) else: print_json(results)
def save_to_dir(self, path): os.makedirs(path, exist_ok=True) self.vocabulary.save_to_dir(path) # self.matrix.tofile(os.path.join(path,"vectors.bin")) # np.save(os.path.join(path, "vectors.npy"), self.matrix) self.save_matr_to_hdf5(path) save_json(self.metadata, os.path.join(path, "metadata.json"))
def main(): # config = load_config() # print(config) parser = argparse.ArgumentParser() parser.add_argument("embeddings") parser.add_argument("dataset") parser.add_argument("--method", help="analogy solving method", default="LRCos") parser.add_argument("--path_out", help="destination folder to save results") args = parser.parse_args() embeddings = load_from_dir(args.embeddings) # print("embeddings", embeddings) benchmark = select_method(args.method) results = benchmark.get_result(embeddings, args.dataset) if args.path_out: if os.path.isdir(args.path_out) or args.path_out.endswith("/"): dataset = os.path.basename(os.path.normpath(args.dataset)) name_file_out = os.path.join(args.path_out, dataset, args.method, "results.json") save_json(results, name_file_out) else: save_json(results, args.path_out) else: print_json(results)
def main(): # config = load_config() # print(config) parser = argparse.ArgumentParser() parser.add_argument("embeddings") parser.add_argument("dataset") parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=30, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--layer', '-l', type=int, default=1, help='Number of layers of RNN or MLP following CNN') parser.add_argument('--dropout', '-d', type=float, default=0.4, help='Dropout rate') parser.add_argument('--model', '-model', default='cnn', choices=['cnn', 'rnn', 'bow'], help='Name of encoder model type') parser.add_argument("--path_out", default=False, help="destination folder to save results") args = parser.parse_args() embeddings = load_from_dir(args.embeddings) # print("embeddings", embeddings) text_classification = Text_classification(batchsize=args.batchsize, epoch=args.epoch, gpu=args.gpu, layer=args.layer, dropout=args.dropout, model=args.model) results = text_classification.get_result(embeddings, args.dataset) if args.path_out: if os.path.isdir(args.path_out) or args.path_out.endswith("/"): dataset = os.path.basename(os.path.normpath(args.dataset)) name_file_out = os.path.join(args.path_out, dataset, "results.json") save_json(results, name_file_out) else: save_json(results, args.path_out) else: print_json(results)
def save_results(results, path_out, dataset_name): # create subdirs unless explicitly asked to not do so # TODO: add submodules to append to path timestamp = get_time_str() if isinstance(results, list): task = results[0]["experiment_setup"]["task"] else: task = results["experiment_setup"]["task"] task = task.replace(" ", "_") name_file_out = os.path.join(path_out, task, dataset_name, timestamp, "results.json") save_json(results, name_file_out)
def run_benchmark_by_name(name, args): print(name, args) print("running ", name) mod = importlib.import_module("vecto.benchmarks." + name) parser = argparse.ArgumentParser() add_extra_args = getattr(mod, 'add_extra_args') add_extra_args(parser) parser.add_argument("--path_out", default=None, help="destination folder to save results") args = parser.parse_args(args) dict_args = vars(args) embeddings = load_from_dir(args.embeddings) # TODO: this is ugly hack, do subparsers or something if name == "language_modeling": dataset = Dataset("/tmp/") dataset.name = "ptb" else: dataset = Dataset(args.dataset) dict_args.pop("dataset") dict_args.pop("embeddings") # TODO: not sure if all banchmarks use dataset arg path_out = dict_args.pop("path_out") Benchmark = getattr(mod, "Benchmark") benchmark = Benchmark(**dict_args) print("SHAPE:", embeddings.matrix.shape) print("vocab size:", embeddings.vocabulary.cnt_words) results = benchmark.run(embeddings, dataset) if path_out: if os.path.isdir(path_out) or path_out.endswith("/"): dataset = dataset.metadata["name"] timestamp = get_time_str() if isinstance(results, list): task = results[0]["experiment_setup"]["task"] else: task = results["experiment_setup"]["task"] name_file_out = os.path.join(path_out, task, dataset, timestamp, "results.json") save_json(results, name_file_out) else: save_json(results, path_out) else: print_json(results)
def run_with_args(self, args): embeddings = load_from_dir(args.embeddings) print("SHAPE:", embeddings.matrix.shape) results = self.get_result(embeddings, args.dataset) if args.path_out: if os.path.isdir(args.path_out) or args.path_out.endswith("/"): dataset = os.path.basename(os.path.normpath(args.dataset)) timestamp = get_time_str() task = results[0]["experiment_setup"]["task"] name_file_out = os.path.join(args.path_out, task, dataset, timestamp, "results.json") save_json(results, name_file_out) else: save_json(results, args.path_out) else: print_json(results)
def main(): parser = argparse.ArgumentParser() parser.add_argument('embeddings') parser.add_argument('dataset') parser.add_argument('--method', help='Outlier detection method', default='AveragePairwiseCosine') parser.add_argument('--path_out', help='Destination folder to save the results') args = parser.parse_args() embeddings = load_from_dir(args.embeddings) benchmark = select_method(args.method) results = benchmark.get_result(embeddings, args.dataset) if args.path_out: if path.isdir(args.path_out) or args.path_out.endswith('/'): dataset = path.basename(path.normpath(args.dataset)) name_file_out = path.join(args.path_out, dataset, args.method, 'results.json') save_json(results, name_file_out) else: save_json(results, args.path_out) else: print_json(results)
def _init_metadata(self, embeddings): """Metadata Initialization helper""" self.metadata = {} self.metadata["timestamp"] = {} self.metadata["version"] = "ldt v. " + __version__ self.metadata["class"] = "experiment" if hasattr(self, "embeddings"): self.metadata["embeddings"] = [] shared_subpath = check_shared_subpath(embeddings, "") for embedding in embeddings: meta_path = os.path.join(embedding, "metadata.json") if os.path.isfile(meta_path): embedding_metadata = load_json(meta_path) embedding_metadata["path"] = embedding else: embedding_metadata = create_metadata_stub(embedding, shared_subpath) save_json(embedding_metadata, meta_path) self.metadata["embeddings"].append(embedding_metadata)
def main(): # config = load_config() # print(config) parser = argparse.ArgumentParser() parser.add_argument("embeddings") parser.add_argument("dataset") parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=1, help='Number of sweeps over the dataset to train') parser.add_argument('--nb_filter', '-nf', type=int, default=100, help='filter number') parser.add_argument('--filter_length', '-fl', type=int, default=3, help='filter length') parser.add_argument('--hidden_dims', '-hd', type=int, default=100, help='D') parser.add_argument('--position_dims', '-pd', type=int, default=100, help='D') parser.add_argument("--path_out", default=False, help="destination folder to save results") args = parser.parse_args() embeddings = load_from_dir(args.embeddings) # print("embeddings", embeddings) # print(args.normalize) relation_extraction = Relation_extraction(batchsize=args.batchsize, epoch=args.epoch, nb_filter=args.nb_filter, filter_length=args.filter_length, hidden_dims=args.hidden_dims, position_dims=args.position_dims,) results = relation_extraction.get_result(embeddings, args.dataset) if args.path_out: if os.path.isdir(args.path_out) or args.path_out.endswith("/"): dataset = os.path.basename(os.path.normpath(args.dataset)) name_file_out = os.path.join(args.path_out, dataset, "results.json") save_json(results, name_file_out) else: save_json(results, args.path_out) else: print_json(results)
def main(): # config = load_config() # print(config) parser = argparse.ArgumentParser() parser.add_argument("embeddings") parser.add_argument("dataset") parser.add_argument("--path_out", default=False, help="destination folder to save results") args = parser.parse_args() embeddings = load_from_dir(args.embeddings) # print("embeddings", embeddings) similarity = Similarity() results = similarity.get_result(embeddings, args.dataset) if args.path_out: if os.path.isdir(args.path_out) or args.path_out.endswith("/"): dataset = os.path.basename(os.path.normpath(args.dataset)) name_file_out = os.path.join(args.path_out, dataset, "results.json") save_json(results, name_file_out) else: save_json(results, args.path_out) else: print_json(results)
def run(options, extra_args): parser = argparse.ArgumentParser() parser.add_argument('embeddings') parser.add_argument('dataset') parser.add_argument('--method', help='Categorization method', default='KMeansCategorization') parser.add_argument('--path_out', help='Destination folder to save the results') args = parser.parse_args(extra_args) embeddings = load_from_dir(args.embeddings) benchmark = select_method(args.method) results = benchmark.get_result(embeddings, args.dataset) if args.path_out: if path.isdir(args.path_out) or args.path_out.endswith('/'): dataset = path.basename(path.normpath(args.dataset)) timestamp = get_time_str() name_file_out = path.join(args.path_out, dataset, args.method, timestamp, 'results.json') save_json(results, name_file_out) else: save_json(results, args.path_out) else: print_json(results)
def get_analogy(self, dataset): self.dataset_path = dataset analogy = LRCos() self.result = analogy.get_result(self.embeddings, dataset) save_json(self.result, "./res.json") return self.result