def main(_): set_seed(FLAGS.seed, set_tf_seed=FLAGS.debug) logs_dir = Path(FLAGS.logs_dir) setup_logger(FLAGS.print_logs, FLAGS.save_logs, logs_dir, FLAGS.run_id) tf.config.experimental_run_functions_eagerly(FLAGS.debug) logging.info(f"Flags/config of this run:\n{get_flags_dict(FLAGS)}") gpus = tf.config.experimental.list_physical_devices('GPU') logging.info(f"Num GPUs Available: {len(gpus)}") if len(gpus) > 1: try: # Restrict TensorFlow to only use the first GPU logging.info(f"Setting GPU Index {FLAGS.gpu_index} only") tf.config.experimental.set_visible_devices(gpus[FLAGS.gpu_index], 'GPU') except RuntimeError as e: logging.info(e) # Visible devices must be set before GPUs have been initialized # load data train, dev, test, samples, n_relations, train_len, data = load_data(FLAGS) n_users, n_items, n_entities = get_quantities(data) model = get_model(n_entities, n_relations, data["id2iid"]) optimizer = get_optimizer(FLAGS) loss_fn = getattr(losses, FLAGS.loss_fn)(ini_neg_index=0, end_neg_index=n_entities - 1, args=FLAGS) logging.info(f"Train split size: {train_len}, relations: {n_relations}") runner = Runner(FLAGS, model, optimizer, loss=loss_fn, train=train, dev=dev, test=test, samples=samples, id2uid=data["id2uid"], id2iid=data["id2iid"], iid2name=data["iid2name"]) runner.run() logging.info("Done!")
def setUp(self): super().setUp() set_seed(42, set_tf_seed=True) self.dtype = tf.float64 tf.keras.backend.set_floatx("float64") self.flags = get_flags() self.n_users = 2 self.n_items = 2 self.n_relations = 1 self.item_ids = [0, 1]
def main(_): setup_logger(print_logs=True, save_logs=False, save_path="", run_id="") set_seed(FLAGS.seed, set_tf_seed=False) triplets, all_rels = load_data(FLAGS) graph = build_graph(triplets) logging.info(nx.info(graph)) curvatures = seccurv(graph, sample_ratio=FLAGS.sample_ratio, max_neigh_pairs=FLAGS.max_neigh_pairs) out_file = f"outseccurv-{FLAGS.prep_name.split('-')[0]}-{'all' if all_rels else 'no'}rel" np.save(out_file, curvatures)
def main(_): set_seed(FLAGS.seed, set_tf_seed=True) dataset_path = Path(FLAGS.dataset_path) if FLAGS.item == "keen": samples = keen.load_user_keen_interactions( dataset_path, min_user_ints=FLAGS.min_user_interactions, min_item_ints=FLAGS.min_item_interactions, max_item_ints=FLAGS.max_item_interactions) iid2name = keen.build_iid2title(item_id_key="keen_id", item_title_key="keen_title") elif FLAGS.item == "gem": samples = keen.load_keen_gems_interactions( dataset_path, min_keen_keen_edges=2, max_keen_keen_edges=1000, min_overlapping_users=2, min_keen_ints=FLAGS.min_user_interactions, min_item_ints=FLAGS.min_item_interactions, max_item_ints=FLAGS.max_item_interactions) iid2name = keen.build_iid2title(item_id_key="gem_id", item_title_key="gem_link_title") elif FLAGS.item == "ml-1m": samples = movielens.movielens_to_dict(dataset_path) iid2name = movielens.build_movieid2title(dataset_path) elif "amazon" in FLAGS.item: samples = amazon.load_interactions(dataset_path / FLAGS.amazon_reviews) iid2name = amazon.build_itemid2name(dataset_path / FLAGS.amazon_meta) else: raise ValueError(f"Unknown item: {FLAGS.item}") if FLAGS.filter_most_popular > 0: print(f"Filtering {FLAGS.filter_most_popular} most popular items") sorted_items = sort_items_by_popularity(samples) iid_to_filter = set( [iid for iid, _ in sorted_items[:FLAGS.filter_most_popular]]) samples = { uid: list(set(ints) - iid_to_filter) for uid, ints in samples.items() } samples = {uid: ints for uid, ints in samples.items() if ints} if FLAGS.plot_graph: plot_graph(samples) return uid2id, iid2id = map_raw_ids_to_sequential_ids(samples) id_samples = {} for uid, ints in samples.items(): if FLAGS.item == "keen" or FLAGS.item == "gem": ints = sorted(ints) id_samples[uid2id[uid]] = [iid2id[iid] for iid in ints] data = create_splits(id_samples, Relations.USER_ITEM.value, do_random=FLAGS.shuffle, seed=FLAGS.seed) data["iid2name"] = {iid: iid2name.get(iid, "None") for iid in iid2id} data["id2uid"] = {v: k for k, v in uid2id.items()} data["id2iid"] = {v: k for k, v in iid2id.items()} print(f"User item interaction triplets: {len(data['train'])}") n_entities = len(uid2id) + len(iid2id) # if there is an item-item graph, we preprocess it if FLAGS.item_item_file: item_item_distances_dict = load_item_item_distances( dataset_path / FLAGS.item_item_file) item_item_triplets = build_item_item_triplets( item_item_distances_dict, iid2id, FLAGS.similarity_items_per_item) add_to_train_split(data, item_item_triplets) print( f"Added item-item similarity triplets: {len(item_item_triplets)}") if "amazon" in FLAGS.item and FLAGS.add_extra_relations: print("Adding extra relations") n_entities = amazon_relations.load_relations( dataset_path / FLAGS.amazon_meta, data, iid2id, n_entities) data["n_entities"] = n_entities # creates directories to save preprocessed data print(f"Final training split: {len(data['train'])} triplets") prep_path = Path(CONFIG["string"]["prep_dir"][1]) prep_path.mkdir(parents=True, exist_ok=True) to_save_dir = prep_path / FLAGS.item to_save_dir.mkdir(parents=True, exist_ok=True) save_as_pickle(to_save_dir / f'{FLAGS.prep_id}.pickle', data) if FLAGS.export_splits: export_splits(data, to_save_dir, FLAGS.prep_id) print("Done!")
def setUp(self): super().setUp() set_seed(42, set_tf_seed=True) self.dtype = tf.float64 self.c = tf.convert_to_tensor([1.0], dtype=self.dtype)