def tfidf_covariance(texts, savepath): if not savepath.endswith("/"): savepath = savepath + "/" if os.path.exists(savepath + "__linkage_average.npy"): Z = np.load(savepath + "__linkage_average.npy") else: if not os.path.exists(savepath): os.makedirs(savepath) from sklearn.feature_extraction.text import TfidfVectorizer vectorizer = TfidfVectorizer(input = str, strip_accents = 'ascii', analyzer ='word', max_features=5000) y = vectorizer.fit_transform(" ".join(text) for text in texts) Z = linkage(y.todense(), method='average', metric='euclidean') np.save(savepath + "__linkage_average.npy", Z) if os.path.exists(savepath + "__covariance__.npy"): Cov = np.load(savepath + "__covariance__.npy") observables = HierarchicalObservation(Cov) else: root, nodes = to_tree(Z, rd=True) assign_parents(root) adj_mat = get_adjacency_matrix(nodes) deg_mat = get_degree_matrix(nodes) sigma = 5 laplacian = np.diag(deg_mat) - adj_mat + 1/(sigma**2) * np.eye(len(deg_mat)) Cov = np.linalg.inv(laplacian)[:len(texts), :len(texts)] np.save(savepath + "__covariance__.npy", Cov) observables = HierarchicalObservation(Cov) return observables
def load_data(data_type: str, num_features: int, num_rel_types: int, sparse: bool = True, graph_type: str = 'homogeneous', edges: bool = False, num_nodes: int = 1000, data_path: str = None, entities_path: str = None, relations_path: str = None) -> tuple: print('[.] Loading data...') if data_type == 'random': return load_random(num_features, num_rel_types, sparse, graph_type, num_nodes, edges) elif 'covid' in data_type: entity_emb, entity_map = load_covid_drkg() A = get_adjacency_matrix('./data/drkg.tsv', 'drkg-relations.tsv', entity_map) return entity_emb, A elif data_type == 'custom': try: X, A, E = load_data_from_file(data_path, entities_path, relations_path) return X, A, E except FileNotFoundError: print('[-] Incorrect file path...') exit(-1) else: raise ValueError('[-] data_type can only be one of \'random\',' '\'covid\', and \'custom\'.')
def tfidf_covariance(texts, savepath): if not savepath.endswith("/"): savepath = savepath + "/" if os.path.exists(savepath + "__linkage_average.npy"): Z = np.load(savepath + "__linkage_average.npy") else: if not os.path.exists(savepath): os.makedirs(savepath) from sklearn.feature_extraction.text import TfidfVectorizer vectorizer = TfidfVectorizer(input=str, strip_accents='ascii', analyzer='word', max_features=5000) y = vectorizer.fit_transform(" ".join(text) for text in texts) Z = linkage(y.todense(), method='average', metric='euclidean') np.save(savepath + "__linkage_average.npy", Z) if os.path.exists(savepath + "__covariance__.npy"): Cov = np.load(savepath + "__covariance__.npy") observables = HierarchicalObservation(Cov) else: root, nodes = to_tree(Z, rd=True) assign_parents(root) adj_mat = get_adjacency_matrix(nodes) deg_mat = get_degree_matrix(nodes) sigma = 5 laplacian = np.diag(deg_mat) - adj_mat + 1 / (sigma**2) * np.eye( len(deg_mat)) Cov = np.linalg.inv(laplacian)[:len(texts), :len(texts)] np.save(savepath + "__covariance__.npy", Cov) observables = HierarchicalObservation(Cov) return observables
def objectlm_covariance(matrix, savepath, metric="cosine"): if not savepath.endswith("/"): savepath = savepath + "/" if os.path.exists(savepath + "__linkage_average.npy"): Z = np.load(savepath + "__linkage_average.npy") else: if not os.path.exists(savepath): os.makedirs(savepath) Z = linkage(matrix, method='average', metric=metric) np.save(savepath + "__linkage_average.npy", Z) if os.path.exists(savepath + "__covariance__.npy"): Cov = np.load(savepath + "__covariance__.npy") observables = HierarchicalObservation(Cov) else: root, nodes = to_tree(Z, rd=True) assign_parents(root) adj_mat = get_adjacency_matrix(nodes) deg_mat = get_degree_matrix(nodes) sigma = 5 laplacian = np.diag(deg_mat) - adj_mat + 1 / (sigma**2) * np.eye( len(deg_mat)) Cov = np.linalg.inv(laplacian)[:matrix.shape[0], :matrix.shape[0]] np.save(savepath + "__covariance__.npy", Cov) observables = HierarchicalObservation(Cov) return observables
def objectlm_covariance(matrix, savepath, metric="cosine"): if not savepath.endswith("/"): savepath = savepath + "/" if os.path.exists(savepath + "__linkage_average.npy"): Z = np.load(savepath + "__linkage_average.npy") else: if not os.path.exists(savepath): os.makedirs(savepath) Z = linkage(matrix, method='average', metric=metric) np.save(savepath + "__linkage_average.npy", Z) if os.path.exists(savepath + "__covariance__.npy"): Cov = np.load(savepath + "__covariance__.npy") observables = HierarchicalObservation(Cov) else: root, nodes = to_tree(Z, rd=True) assign_parents(root) adj_mat = get_adjacency_matrix(nodes) deg_mat = get_degree_matrix(nodes) sigma = 5 laplacian = np.diag(deg_mat) - adj_mat + 1/(sigma**2) * np.eye(len(deg_mat)) Cov = np.linalg.inv(laplacian)[:matrix.shape[0], :matrix.shape[0]] np.save(savepath + "__covariance__.npy", Cov) observables = HierarchicalObservation(Cov) return observables
recording='data/processed/METR-LA' """ Dataset """ # read sensor IDs with open(sensor_ids) as f: sensor_ids = f.read().strip().split(',') # read sensor distance distance_df = pd.read_csv(sensor_distance, dtype={'from': 'str', 'to': 'str'}) # build adj matrix based on equation (10) adj_mx = utils.get_adjacency_matrix(distance_df, sensor_ids) data = utils.load_dataset(dataset_dir=recording, batch_size=batch_size, test_batch_size=batch_size) test_data_loader = data['test_loader'] standard_scaler = data['scaler'] num_test_iteration_per_epoch = math.ceil(data['x_test'].shape[0] / batch_size) """ Restore model from the checkpoint """ device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = DCRNNModel(adj_mx,