def _eval_roberta_model(label_est_samples, valid_samples) -> float: estimated_labelprops = calculate_labelprops( label_est_samples, _DATADEF.n_classes, _DATADEF.domain_names, ) valid_loader = DataLoader( RobertaDataset( valid_samples, _DATADEF.n_classes, _DATADEF.domain_names, source2labelprops=estimated_labelprops, ), batch_size=100, shuffle=False, num_workers=1, ) metrics = valid_epoch(model, valid_loader) return metrics["f1"]
def _eval_lex_model(label_est_samples, valid_samples) -> float: estimated_labelprops = { "estimated": calculate_labelprops( label_est_samples, _DATADEF.n_classes, _DATADEF.domain_names, ) } datadef = get_datadef(_DATASET_NAME) datadef.load_labelprops_func = lambda _split: estimated_labelprops[ _split ] metrics = eval_lexicon_model( model, datadef, valid_samples, vocab, use_source_individual_norm=_LEXICON_CONFIG[ "use_source_individual_norm" ], labelprop_split="estimated", # match _load_labelprops_func() ) return metrics["valid_f1"]
from experiments.datadef.definitions.framing import ( _LABELPROPS_DIR, ISSUES, PRIMARY_FRAME_NAMES, PRIMARY_TONE_NAMES, load_all_framing_samples, ) from modapt.dataset.common import calculate_labelprops from modapt.utils import save_json makedirs(_LABELPROPS_DIR, exist_ok=True) # primary frame for split in ["train", "test"]: samples = load_all_framing_samples(ISSUES, split, "primary_frame") source2labelprops = calculate_labelprops(samples, len(PRIMARY_FRAME_NAMES), ISSUES) save_json( { issue: labelprops.tolist() for issue, labelprops in source2labelprops.items() }, join(_LABELPROPS_DIR, f"primary_frame.{split}.json"), ) # primary tone for split in ["train", "test"]: samples = load_all_framing_samples(ISSUES, split, "primary_tone") source2labelprops = calculate_labelprops(samples, len(PRIMARY_TONE_NAMES), ISSUES) save_json( {
from os import makedirs from os.path import join from experiments.datadef.definitions.amazon import ( CATEGORIES, LABELPROPS_DIR, RATING_N_CLASSES, ) from modapt.dataset.amazon.samples import load_all_amazon_review_samples from modapt.dataset.common import calculate_labelprops from modapt.utils import save_json makedirs(LABELPROPS_DIR, exist_ok=True) for split in ["train", "valid", "test"]: samples = load_all_amazon_review_samples(CATEGORIES, split) source2labelprops = calculate_labelprops(samples, RATING_N_CLASSES, CATEGORIES) save_json( { issue: labelprops.tolist() for issue, labelprops in source2labelprops.items() }, join(LABELPROPS_DIR, f"{split}.json"), )
from os import makedirs from os.path import join from experiments.datadef.definitions.sentiment import ( _LABELPROPS_DIR, POLARITY_NAMES, SENTIMENT_SOURCES, load_sentiment_samples, ) from modapt.dataset.common import calculate_labelprops from modapt.utils import save_json makedirs(_LABELPROPS_DIR, exist_ok=True) for split in ["train", "valid", "test"]: samples = load_sentiment_samples(SENTIMENT_SOURCES, split) source2labelprops = calculate_labelprops(samples, len(POLARITY_NAMES), SENTIMENT_SOURCES) save_json( { issue: labelprops.tolist() for issue, labelprops in source2labelprops.items() }, join(_LABELPROPS_DIR, f"{split}.json"), )
from os import makedirs from os.path import join from modapt.dataset.arxiv.definition import ( ARXIV_CATEGORIES, LABELPROPS_DIR, YEARRANGE_N_CLASSES, ) from modapt.dataset.arxiv.samples import ( load_all_arxiv_abstract_samples, ) from modapt.dataset.common import calculate_labelprops from modapt.utils import save_json makedirs(LABELPROPS_DIR, exist_ok=True) for split in ["train", "valid", "test"]: samples = load_all_arxiv_abstract_samples(ARXIV_CATEGORIES, split) source2labelprops = calculate_labelprops(samples, YEARRANGE_N_CLASSES, ARXIV_CATEGORIES) save_json( { issue: labelprops.tolist() for issue, labelprops in source2labelprops.items() }, join(LABELPROPS_DIR, f"{split}.json"), )