METRICS = ['mrr'] MODELS = [ # Only item sets #Countbased(), #SVDRecommender(1000, use_title=False), #AAERecommender(adversarial=True, use_title=False, n_epochs=55, embedding=VECTORS), #AAERecommender(adversarial=False, n_epochs=1), #VAERecommender(conditions=None, n_epochs=55, batch_size=1000), #DAERecommender(conditions=None, n_epochs=55, batch_size=1000), # Title-enhanced #SVDRecommender(1000, use_title=True), #AAERecommender(adversarial=True, use_side_info=True, n_epochs=55, embedding=VECTORS), #AAERecommender(adversarial=False, use_side_info=["name"], n_epochs=5, embedding=VECTORS), #DecodingRecommender(n_epochs=55, embedding=VECTORS) VAERecommender(conditions=CONDITIONS, n_epochs=55, batch_size=1000), DAERecommender(conditions=CONDITIONS, n_epochs=55, batch_size=1000), # Generic condition all #AAERecommender(adversarial=False, conditions=CONDITIONS, n_epochs=55), #AAERecommender(adversarial=True, conditions=CONDITIONS, n_epochs=55), #DecodingRecommender(conditions=CONDITIONS, n_epochs=55) # Put more here... ] def load(path): """ Loads a single slice """ with open(path, 'r') as fhandle: obj = json.load(fhandle) return obj["playlists"]
'n_hidden': 100, 'normalize_inputs': True, } # Models without metadata BASELINES = [ # RandomBaseline(), # MostPopular(), Countbased(), SVDRecommender(1000, use_title=False), ] RECOMMENDERS = [ AAERecommender(adversarial=False, lr=0.001, **ae_params), AAERecommender(prior='gauss', gen_lr=0.001, reg_lr=0.001, **ae_params), VAERecommender(conditions=None, **vae_params), DAERecommender(conditions=None, **ae_params) ] # Metadata to use CONDITIONS = ConditionList([ ('title', PretrainedWordEmbeddingCondition(VECTORS)), # ('author', CategoricalCondition(embedding_dim=32, reduce="sum", # sparse=True, embedding_on_gpu=True)) ]) # Model with metadata (metadata used as set in CONDITIONS above) CONDITIONED_MODELS = [ # TODO SVD can use only titles not generic conditions SVDRecommender(1000, use_title=True), AAERecommender(adversarial=False,
def main(year, dataset, min_count=None, outfile=None, drop=1, baselines=False, autoencoders=False, conditioned_autoencoders=False, all_metadata=True): """ Main function for training and evaluating AAE methods on DBLP data """ assert baselines or autoencoders or conditioned_autoencoders, "Please specify what to run" if all_metadata: # V2 - all metadata CONDITIONS = ConditionList([ ('title', PretrainedWordEmbeddingCondition(VECTORS)), ('venue', PretrainedWordEmbeddingCondition(VECTORS)), ( 'author', CategoricalCondition( embedding_dim=32, reduce="sum", # vocab_size=0.01, sparse=False, embedding_on_gpu=True)) ]) else: # V1 - only title metadata CONDITIONS = ConditionList([ ('title', PretrainedWordEmbeddingCondition(VECTORS)) ]) #### CONDITOINS defined ALL_MODELS = [] if baselines: # Models without metadata BASELINES = [ # RandomBaseline(), # MostPopular(), Countbased(), SVDRecommender(1000, use_title=False) ] ALL_MODELS += BASELINES if not all_metadata: # SVD can use only titles not generic conditions ALL_MODELS += [SVDRecommender(1000, use_title=True)] if autoencoders: AUTOENCODERS = [ AAERecommender(adversarial=False, conditions=None, lr=0.001, **AE_PARAMS), AAERecommender(adversarial=True, conditions=None, gen_lr=0.001, reg_lr=0.001, **AE_PARAMS), VAERecommender(conditions=None, **AE_PARAMS), DAERecommender(conditions=None, **AE_PARAMS) ] ALL_MODELS += AUTOENCODERS if conditioned_autoencoders: # Model with metadata (metadata used as set in CONDITIONS above) CONDITIONED_AUTOENCODERS = [ AAERecommender(adversarial=False, conditions=CONDITIONS, lr=0.001, **AE_PARAMS), AAERecommender(adversarial=True, conditions=CONDITIONS, gen_lr=0.001, reg_lr=0.001, **AE_PARAMS), DecodingRecommender(CONDITIONS, n_epochs=100, batch_size=1000, optimizer='adam', n_hidden=100, lr=0.001, verbose=True), VAERecommender(conditions=CONDITIONS, **AE_PARAMS), DAERecommender(conditions=CONDITIONS, **AE_PARAMS) ] ALL_MODELS += CONDITIONED_AUTOENCODERS print("Finished preparing models:", *ALL_MODELS, sep='\n\t') path = DATA_PATH + ("dblp-ref/" if dataset == "dblp" else "acm.txt") print("Loading data from", path) papers = papers_from_files(path, dataset, n_jobs=4) print("Unpacking {} data...".format(dataset)) bags_of_papers, ids, side_info = unpack_papers(papers) del papers bags = Bags(bags_of_papers, ids, side_info) if args.compute_mi: from aaerec.utils import compute_mutual_info print("[MI] Dataset:", dataset) print("[MI] min Count:", min_count) tmp = bags.build_vocab(min_count=min_count, max_features=None) mi = compute_mutual_info(tmp, conditions=None, include_labels=True, normalize=True) with open('mi.csv', 'a') as mifile: print(dataset, min_count, mi, sep=',', file=mifile) print("=" * 78) exit(0) log("Whole dataset:", logfile=outfile) log(bags, logfile=outfile) evaluation = Evaluation(bags, year, logfile=outfile) evaluation.setup(min_count=min_count, min_elements=2, drop=drop) with open(outfile, 'a') as fh: print("~ Partial List + Titles + Author + Venue", "~" * 42, file=fh) evaluation(ALL_MODELS, batch_size=1000)