] SIMILARITIES = [ ("cos", CosSimilarity, {"threshold": 0.3}), ] DIRECTIONS = [ ("edge", EdgeBased, {}), ("order", OrderBased, {}), ] SCORERS = [ ("add_f=0.0_b=1.0_s=1.0", AddScorer, {}), ] SUMMARIZERS = [DefaultSummarizer(num_words=200)] experiment_time = int(time.time()) results_path = Path(f"results/exp10") for embedder_id, embedder, embedder_args in EMBEDDERS: Embedder = embedder(**embedder_args) for Summarizer, (dataset_id, dataset, dataset_args) in zip(SUMMARIZERS, DATASETS): DataSet = dataset(**dataset_args) docs = list(DataSet) if DEBUG: docs = docs[:5] print(f"embedding dataset {dataset_id} with {embedder_id}") embeds = [Embedder.get_embeddings(doc) for doc in tqdm(docs)] for similarity_id, similarity, similarity_args in SIMILARITIES: Similarity = similarity(**similarity_args)
# {"model": "roberta-large-nli-mean-tokens"} # ), ] SIMILARITIES = [ ("cos", CosSimilarity, {}), ] DIRECTIONS = [ ("edge", EdgeBased, {}), ] SCORERS = [ ("add_f=0.0_b=1.0_s=1.0", AddScorer, {}), ] Summarizer = DefaultSummarizer(num_words=220) experiment_time = int(time.time()) # results_path = Path(f"results/{experiment_time}") results_path = Path(f"results/exp4") for embedder_id, embedder, embedder_args in EMBEDDERS: Embedder = embedder(**embedder_args) for dataset_id, dataset, dataset_args in DATASETS: DataSet = dataset(**dataset_args) docs = list(DataSet) if DEBUG: docs = docs[:5] print(f"embedding dataset {dataset_id} with {embedder_id}") embeds = [Embedder.get_embeddings(doc) for doc in tqdm(docs)] for similarity_id, similarity, similarity_args in SIMILARITIES:
# {"model": "roberta-large-nli-mean-tokens"} # ), ] SIMILARITIES = [ ("cos", CosSimilarity, {}), ] DIRECTIONS = [ ("order", OrderBased, {}), ] SCORERS = [ ("add_f=0.0_b=1.0_s=1.0", AddScorer, {}), ] SUMMARIZERS = [ DefaultSummarizer(num_words=200), DefaultSummarizer(num_words=220) ] experiment_time = int(time.time()) results_path = Path(f"results/exp8") for embedder_id, embedder, embedder_args in EMBEDDERS: Embedder = embedder(**embedder_args) for Summarizer, (dataset_id, dataset, dataset_args) in zip(SUMMARIZERS, DATASETS): DataSet = dataset(**dataset_args) docs = list(DataSet) if DEBUG: docs = docs[:5] print(f"embedding dataset {dataset_id} with {embedder_id}")
}), ] SIMILARITIES = [ ("cos", CosSimilarity, {}), ] DIRECTIONS = [ ("edge", EdgeBased, {}), ] SCORERS = [ ("add_f=0.0_b=1.0_s=0.5", AddScorer, { "section_weight": 0.5 }), ] Summarizer = DefaultSummarizer() experiment_time = int(time.time()) results_path = Path(f"results/exp3") for embedder_id, embedder, embedder_args in EMBEDDERS: Embedder = embedder(**embedder_args) for dataset_id, dataset, dataset_args in DATASETS: DataSet = dataset(**dataset_args) docs = list(DataSet) if DEBUG: docs = docs[:5] print(f"embedding dataset {dataset_id} with {embedder_id}") embeds = [Embedder.get_embeddings(doc) for doc in tqdm(docs)] for similarity_id, similarity, similarity_args in SIMILARITIES: Similarity = similarity(**similarity_args)