Exemple #1
0
 def _eval_roberta_model(label_est_samples, valid_samples) -> float:
     estimated_labelprops = calculate_labelprops(
         label_est_samples,
         _DATADEF.n_classes,
         _DATADEF.domain_names,
     )
     valid_loader = DataLoader(
         RobertaDataset(
             valid_samples,
             _DATADEF.n_classes,
             _DATADEF.domain_names,
             source2labelprops=estimated_labelprops,
         ),
         batch_size=100,
         shuffle=False,
         num_workers=1,
     )
     metrics = valid_epoch(model, valid_loader)
     return metrics["f1"]
Exemple #2
0
 def _eval_lex_model(label_est_samples, valid_samples) -> float:
     estimated_labelprops = {
         "estimated": calculate_labelprops(
             label_est_samples,
             _DATADEF.n_classes,
             _DATADEF.domain_names,
         )
     }
     datadef = get_datadef(_DATASET_NAME)
     datadef.load_labelprops_func = lambda _split: estimated_labelprops[
         _split
     ]
     metrics = eval_lexicon_model(
         model,
         datadef,
         valid_samples,
         vocab,
         use_source_individual_norm=_LEXICON_CONFIG[
             "use_source_individual_norm"
         ],
         labelprop_split="estimated",  # match _load_labelprops_func()
     )
     return metrics["valid_f1"]
from experiments.datadef.definitions.framing import (
    _LABELPROPS_DIR,
    ISSUES,
    PRIMARY_FRAME_NAMES,
    PRIMARY_TONE_NAMES,
    load_all_framing_samples,
)
from modapt.dataset.common import calculate_labelprops
from modapt.utils import save_json

makedirs(_LABELPROPS_DIR, exist_ok=True)

# primary frame
for split in ["train", "test"]:
    samples = load_all_framing_samples(ISSUES, split, "primary_frame")
    source2labelprops = calculate_labelprops(samples, len(PRIMARY_FRAME_NAMES),
                                             ISSUES)
    save_json(
        {
            issue: labelprops.tolist()
            for issue, labelprops in source2labelprops.items()
        },
        join(_LABELPROPS_DIR, f"primary_frame.{split}.json"),
    )

# primary tone
for split in ["train", "test"]:
    samples = load_all_framing_samples(ISSUES, split, "primary_tone")
    source2labelprops = calculate_labelprops(samples, len(PRIMARY_TONE_NAMES),
                                             ISSUES)
    save_json(
        {
from os import makedirs
from os.path import join

from experiments.datadef.definitions.amazon import (
    CATEGORIES,
    LABELPROPS_DIR,
    RATING_N_CLASSES,
)
from modapt.dataset.amazon.samples import load_all_amazon_review_samples
from modapt.dataset.common import calculate_labelprops
from modapt.utils import save_json

makedirs(LABELPROPS_DIR, exist_ok=True)

for split in ["train", "valid", "test"]:
    samples = load_all_amazon_review_samples(CATEGORIES, split)
    source2labelprops = calculate_labelprops(samples, RATING_N_CLASSES,
                                             CATEGORIES)
    save_json(
        {
            issue: labelprops.tolist()
            for issue, labelprops in source2labelprops.items()
        },
        join(LABELPROPS_DIR, f"{split}.json"),
    )
Exemple #5
0
from os import makedirs
from os.path import join

from experiments.datadef.definitions.sentiment import (
    _LABELPROPS_DIR,
    POLARITY_NAMES,
    SENTIMENT_SOURCES,
    load_sentiment_samples,
)
from modapt.dataset.common import calculate_labelprops
from modapt.utils import save_json

makedirs(_LABELPROPS_DIR, exist_ok=True)

for split in ["train", "valid", "test"]:
    samples = load_sentiment_samples(SENTIMENT_SOURCES, split)
    source2labelprops = calculate_labelprops(samples, len(POLARITY_NAMES),
                                             SENTIMENT_SOURCES)
    save_json(
        {
            issue: labelprops.tolist()
            for issue, labelprops in source2labelprops.items()
        },
        join(_LABELPROPS_DIR, f"{split}.json"),
    )
Exemple #6
0
from os import makedirs
from os.path import join

from modapt.dataset.arxiv.definition import (
    ARXIV_CATEGORIES,
    LABELPROPS_DIR,
    YEARRANGE_N_CLASSES,
)
from modapt.dataset.arxiv.samples import (
    load_all_arxiv_abstract_samples, )
from modapt.dataset.common import calculate_labelprops
from modapt.utils import save_json

makedirs(LABELPROPS_DIR, exist_ok=True)

for split in ["train", "valid", "test"]:
    samples = load_all_arxiv_abstract_samples(ARXIV_CATEGORIES, split)
    source2labelprops = calculate_labelprops(samples, YEARRANGE_N_CLASSES,
                                             ARXIV_CATEGORIES)
    save_json(
        {
            issue: labelprops.tolist()
            for issue, labelprops in source2labelprops.items()
        },
        join(LABELPROPS_DIR, f"{split}.json"),
    )