Esempio n. 1
0
def main():
    # config = load_config()
    # print(config)
    parser = argparse.ArgumentParser()
    parser.add_argument("embeddings")
    parser.add_argument("--window_size", default=5, type=int)
    parser.add_argument("--test", default=True, help='use small test dataset')
    parser.add_argument("--method",
                        default='lstm',
                        choices=['lr', '2FFNN', 'lstm'],
                        help='name of method')
    parser.add_argument("--path_out",
                        default=False,
                        help="destination folder to save results")
    args = parser.parse_args()
    embeddings = load_from_dir(args.embeddings)
    # print("embeddings", embeddings)
    language_modeling = Language_modeling(window_size=args.window_size,
                                          method=args.method,
                                          test=args.test)
    results = language_modeling.get_result(embeddings)
    if args.path_out:
        if os.path.isdir(args.path_out) or args.path_out.endswith("/"):
            name_file_out = os.path.join(args.path_out, "language_modeling",
                                         "results.json")
            save_json(results, name_file_out)
        else:
            save_json(results, args.path_out)
    else:
        print_json(results)
Esempio n. 2
0
 def save_to_dir(self, path):
     os.makedirs(path, exist_ok=True)
     self.vocabulary.save_to_dir(path)
     # self.matrix.tofile(os.path.join(path,"vectors.bin"))
     # np.save(os.path.join(path, "vectors.npy"), self.matrix)
     self.save_matr_to_hdf5(path)
     save_json(self.metadata, os.path.join(path, "metadata.json"))
Esempio n. 3
0
def main():
    # config = load_config()
    # print(config)
    parser = argparse.ArgumentParser()
    parser.add_argument("embeddings")
    parser.add_argument("dataset")
    parser.add_argument("--method",
                        help="analogy solving method",
                        default="LRCos")
    parser.add_argument("--path_out",
                        help="destination folder to save results")
    args = parser.parse_args()
    embeddings = load_from_dir(args.embeddings)
    # print("embeddings", embeddings)
    benchmark = select_method(args.method)
    results = benchmark.get_result(embeddings, args.dataset)
    if args.path_out:
        if os.path.isdir(args.path_out) or args.path_out.endswith("/"):
            dataset = os.path.basename(os.path.normpath(args.dataset))
            name_file_out = os.path.join(args.path_out, dataset, args.method,
                                         "results.json")
            save_json(results, name_file_out)
        else:
            save_json(results, args.path_out)
    else:
        print_json(results)
Esempio n. 4
0
def main():
    # config = load_config()
    # print(config)
    parser = argparse.ArgumentParser()
    parser.add_argument("embeddings")
    parser.add_argument("dataset")
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=30,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--layer',
                        '-l',
                        type=int,
                        default=1,
                        help='Number of layers of RNN or MLP following CNN')
    parser.add_argument('--dropout',
                        '-d',
                        type=float,
                        default=0.4,
                        help='Dropout rate')
    parser.add_argument('--model',
                        '-model',
                        default='cnn',
                        choices=['cnn', 'rnn', 'bow'],
                        help='Name of encoder model type')
    parser.add_argument("--path_out",
                        default=False,
                        help="destination folder to save results")
    args = parser.parse_args()
    embeddings = load_from_dir(args.embeddings)
    # print("embeddings", embeddings)
    text_classification = Text_classification(batchsize=args.batchsize,
                                              epoch=args.epoch,
                                              gpu=args.gpu,
                                              layer=args.layer,
                                              dropout=args.dropout,
                                              model=args.model)
    results = text_classification.get_result(embeddings, args.dataset)
    if args.path_out:
        if os.path.isdir(args.path_out) or args.path_out.endswith("/"):
            dataset = os.path.basename(os.path.normpath(args.dataset))
            name_file_out = os.path.join(args.path_out, dataset,
                                         "results.json")
            save_json(results, name_file_out)
        else:
            save_json(results, args.path_out)
    else:
        print_json(results)
Esempio n. 5
0
def save_results(results, path_out, dataset_name):
    # create subdirs unless explicitly asked to not do so
    # TODO: add submodules to append to path
    timestamp = get_time_str()
    if isinstance(results, list):
        task = results[0]["experiment_setup"]["task"]
    else:
        task = results["experiment_setup"]["task"]
    task = task.replace(" ", "_")
    name_file_out = os.path.join(path_out, task, dataset_name, timestamp,
                                 "results.json")
    save_json(results, name_file_out)
Esempio n. 6
0
def run_benchmark_by_name(name, args):
    print(name, args)
    print("running ", name)
    mod = importlib.import_module("vecto.benchmarks." + name)
    parser = argparse.ArgumentParser()
    add_extra_args = getattr(mod, 'add_extra_args')
    add_extra_args(parser)
    parser.add_argument("--path_out",
                        default=None,
                        help="destination folder to save results")
    args = parser.parse_args(args)
    dict_args = vars(args)
    embeddings = load_from_dir(args.embeddings)
    # TODO: this is ugly hack, do subparsers or something
    if name == "language_modeling":
        dataset = Dataset("/tmp/")
        dataset.name = "ptb"
    else:
        dataset = Dataset(args.dataset)
        dict_args.pop("dataset")

    dict_args.pop("embeddings")
    # TODO: not sure if all banchmarks use dataset arg
    path_out = dict_args.pop("path_out")
    Benchmark = getattr(mod, "Benchmark")
    benchmark = Benchmark(**dict_args)

    print("SHAPE:", embeddings.matrix.shape)
    print("vocab size:", embeddings.vocabulary.cnt_words)
    results = benchmark.run(embeddings, dataset)
    if path_out:
        if os.path.isdir(path_out) or path_out.endswith("/"):
            dataset = dataset.metadata["name"]
            timestamp = get_time_str()
            if isinstance(results, list):
                task = results[0]["experiment_setup"]["task"]
            else:
                task = results["experiment_setup"]["task"]
            name_file_out = os.path.join(path_out, task, dataset, timestamp,
                                         "results.json")
            save_json(results, name_file_out)
        else:
            save_json(results, path_out)
    else:
        print_json(results)
Esempio n. 7
0
 def run_with_args(self, args):
     embeddings = load_from_dir(args.embeddings)
     print("SHAPE:", embeddings.matrix.shape)
     results = self.get_result(embeddings, args.dataset)
     if args.path_out:
         if os.path.isdir(args.path_out) or args.path_out.endswith("/"):
             dataset = os.path.basename(os.path.normpath(args.dataset))
             timestamp = get_time_str()
             task = results[0]["experiment_setup"]["task"]
             name_file_out = os.path.join(args.path_out,
                                          task,
                                          dataset,
                                          timestamp,
                                          "results.json")
             save_json(results, name_file_out)
         else:
             save_json(results, args.path_out)
     else:
         print_json(results)
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('embeddings')
    parser.add_argument('dataset')
    parser.add_argument('--method', help='Outlier detection method', default='AveragePairwiseCosine')
    parser.add_argument('--path_out', help='Destination folder to save the results')
    args = parser.parse_args()
    embeddings = load_from_dir(args.embeddings)
    benchmark = select_method(args.method)
    results = benchmark.get_result(embeddings, args.dataset)
    if args.path_out:
        if path.isdir(args.path_out) or args.path_out.endswith('/'):
            dataset = path.basename(path.normpath(args.dataset))
            name_file_out = path.join(args.path_out, dataset, args.method, 'results.json')
            save_json(results, name_file_out)
        else:
            save_json(results, args.path_out)
    else:
        print_json(results)
Esempio n. 9
0
    def _init_metadata(self, embeddings):
        """Metadata Initialization helper"""
        self.metadata = {}

        self.metadata["timestamp"] = {}
        self.metadata["version"] = "ldt v. " + __version__
        self.metadata["class"] = "experiment"
        if hasattr(self, "embeddings"):
            self.metadata["embeddings"] = []
            shared_subpath = check_shared_subpath(embeddings, "")
            for embedding in embeddings:

                meta_path = os.path.join(embedding, "metadata.json")
                if os.path.isfile(meta_path):
                    embedding_metadata = load_json(meta_path)
                    embedding_metadata["path"] = embedding
                else:
                    embedding_metadata = create_metadata_stub(embedding, shared_subpath)

                    save_json(embedding_metadata, meta_path)
                self.metadata["embeddings"].append(embedding_metadata)
Esempio n. 10
0
def main():
    # config = load_config()
    # print(config)
    parser = argparse.ArgumentParser()
    parser.add_argument("embeddings")
    parser.add_argument("dataset")

    parser.add_argument('--batchsize', '-b', type=int, default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=1,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--nb_filter', '-nf', type=int, default=100,
                        help='filter number')
    parser.add_argument('--filter_length', '-fl', type=int, default=3,
                        help='filter length')
    parser.add_argument('--hidden_dims', '-hd', type=int, default=100,
                        help='D')
    parser.add_argument('--position_dims', '-pd', type=int, default=100,
                        help='D')
    parser.add_argument("--path_out", default=False, help="destination folder to save results")
    args = parser.parse_args()
    embeddings = load_from_dir(args.embeddings)
    # print("embeddings", embeddings)
    # print(args.normalize)
    relation_extraction = Relation_extraction(batchsize=args.batchsize,
                                              epoch=args.epoch,
                                              nb_filter=args.nb_filter,
                                              filter_length=args.filter_length,
                                              hidden_dims=args.hidden_dims,
                                              position_dims=args.position_dims,)
    results = relation_extraction.get_result(embeddings, args.dataset)
    if args.path_out:
        if os.path.isdir(args.path_out) or args.path_out.endswith("/"):
            dataset = os.path.basename(os.path.normpath(args.dataset))
            name_file_out = os.path.join(args.path_out, dataset, "results.json")
            save_json(results, name_file_out)
        else:
            save_json(results, args.path_out)
    else:
        print_json(results)
Esempio n. 11
0
def main():
    # config = load_config()
    # print(config)
    parser = argparse.ArgumentParser()
    parser.add_argument("embeddings")
    parser.add_argument("dataset")
    parser.add_argument("--path_out",
                        default=False,
                        help="destination folder to save results")
    args = parser.parse_args()
    embeddings = load_from_dir(args.embeddings)
    # print("embeddings", embeddings)
    similarity = Similarity()
    results = similarity.get_result(embeddings, args.dataset)
    if args.path_out:
        if os.path.isdir(args.path_out) or args.path_out.endswith("/"):
            dataset = os.path.basename(os.path.normpath(args.dataset))
            name_file_out = os.path.join(args.path_out, dataset,
                                         "results.json")
            save_json(results, name_file_out)
        else:
            save_json(results, args.path_out)
    else:
        print_json(results)
Esempio n. 12
0
def run(options, extra_args):
    parser = argparse.ArgumentParser()
    parser.add_argument('embeddings')
    parser.add_argument('dataset')
    parser.add_argument('--method',
                        help='Categorization method',
                        default='KMeansCategorization')
    parser.add_argument('--path_out',
                        help='Destination folder to save the results')
    args = parser.parse_args(extra_args)
    embeddings = load_from_dir(args.embeddings)
    benchmark = select_method(args.method)
    results = benchmark.get_result(embeddings, args.dataset)
    if args.path_out:
        if path.isdir(args.path_out) or args.path_out.endswith('/'):
            dataset = path.basename(path.normpath(args.dataset))
            timestamp = get_time_str()
            name_file_out = path.join(args.path_out, dataset, args.method,
                                      timestamp, 'results.json')
            save_json(results, name_file_out)
        else:
            save_json(results, args.path_out)
    else:
        print_json(results)
Esempio n. 13
0
 def get_analogy(self, dataset):
     self.dataset_path = dataset
     analogy = LRCos()
     self.result = analogy.get_result(self.embeddings, dataset)
     save_json(self.result, "./res.json")
     return self.result