def tfidf_covariance(texts, savepath):
    if not savepath.endswith("/"):
        savepath = savepath + "/"
    if os.path.exists(savepath + "__linkage_average.npy"):
        Z = np.load(savepath + "__linkage_average.npy")
    else:
        if not os.path.exists(savepath):
            os.makedirs(savepath)
        from sklearn.feature_extraction.text import TfidfVectorizer
        vectorizer = TfidfVectorizer(input = str,
                                 strip_accents = 'ascii',
                                 analyzer ='word',
                                 max_features=5000)
        y = vectorizer.fit_transform(" ".join(text) for text in texts)
        Z = linkage(y.todense(), method='average', metric='euclidean')
        np.save(savepath + "__linkage_average.npy", Z)

    if os.path.exists(savepath + "__covariance__.npy"):
        Cov = np.load(savepath + "__covariance__.npy")
        observables = HierarchicalObservation(Cov)
    else:
        root, nodes = to_tree(Z, rd=True)
        assign_parents(root)
        adj_mat = get_adjacency_matrix(nodes)
        deg_mat = get_degree_matrix(nodes)
        sigma = 5
        laplacian = np.diag(deg_mat) - adj_mat + 1/(sigma**2) * np.eye(len(deg_mat))
        Cov = np.linalg.inv(laplacian)[:len(texts), :len(texts)]
        np.save(savepath + "__covariance__.npy", Cov)
        observables = HierarchicalObservation(Cov)
    return observables
Exemple #2
0
def load_data(data_type: str,
              num_features: int,
              num_rel_types: int,
              sparse: bool = True,
              graph_type: str = 'homogeneous',
              edges: bool = False,
              num_nodes: int = 1000,
              data_path: str = None,
              entities_path: str = None,
              relations_path: str = None) -> tuple:
    print('[.] Loading data...')
    if data_type == 'random':
        return load_random(num_features, num_rel_types, sparse, graph_type,
                           num_nodes, edges)

    elif 'covid' in data_type:
        entity_emb, entity_map = load_covid_drkg()
        A = get_adjacency_matrix('./data/drkg.tsv', 'drkg-relations.tsv',
                                 entity_map)

        return entity_emb, A

    elif data_type == 'custom':
        try:
            X, A, E = load_data_from_file(data_path, entities_path,
                                          relations_path)
            return X, A, E
        except FileNotFoundError:
            print('[-] Incorrect file path...')
            exit(-1)

    else:
        raise ValueError('[-] data_type can only be one of \'random\','
                         '\'covid\', and \'custom\'.')
def tfidf_covariance(texts, savepath):
    if not savepath.endswith("/"):
        savepath = savepath + "/"
    if os.path.exists(savepath + "__linkage_average.npy"):
        Z = np.load(savepath + "__linkage_average.npy")
    else:
        if not os.path.exists(savepath):
            os.makedirs(savepath)
        from sklearn.feature_extraction.text import TfidfVectorizer
        vectorizer = TfidfVectorizer(input=str,
                                     strip_accents='ascii',
                                     analyzer='word',
                                     max_features=5000)
        y = vectorizer.fit_transform(" ".join(text) for text in texts)
        Z = linkage(y.todense(), method='average', metric='euclidean')
        np.save(savepath + "__linkage_average.npy", Z)

    if os.path.exists(savepath + "__covariance__.npy"):
        Cov = np.load(savepath + "__covariance__.npy")
        observables = HierarchicalObservation(Cov)
    else:
        root, nodes = to_tree(Z, rd=True)
        assign_parents(root)
        adj_mat = get_adjacency_matrix(nodes)
        deg_mat = get_degree_matrix(nodes)
        sigma = 5
        laplacian = np.diag(deg_mat) - adj_mat + 1 / (sigma**2) * np.eye(
            len(deg_mat))
        Cov = np.linalg.inv(laplacian)[:len(texts), :len(texts)]
        np.save(savepath + "__covariance__.npy", Cov)
        observables = HierarchicalObservation(Cov)
    return observables
def objectlm_covariance(matrix, savepath, metric="cosine"):
    if not savepath.endswith("/"):
        savepath = savepath + "/"
    if os.path.exists(savepath + "__linkage_average.npy"):
        Z = np.load(savepath + "__linkage_average.npy")
    else:
        if not os.path.exists(savepath):
            os.makedirs(savepath)
        Z = linkage(matrix, method='average', metric=metric)
        np.save(savepath + "__linkage_average.npy", Z)
    if os.path.exists(savepath + "__covariance__.npy"):
        Cov = np.load(savepath + "__covariance__.npy")
        observables = HierarchicalObservation(Cov)
    else:
        root, nodes = to_tree(Z, rd=True)
        assign_parents(root)
        adj_mat = get_adjacency_matrix(nodes)
        deg_mat = get_degree_matrix(nodes)
        sigma = 5
        laplacian = np.diag(deg_mat) - adj_mat + 1 / (sigma**2) * np.eye(
            len(deg_mat))
        Cov = np.linalg.inv(laplacian)[:matrix.shape[0], :matrix.shape[0]]
        np.save(savepath + "__covariance__.npy", Cov)
        observables = HierarchicalObservation(Cov)
    return observables
def objectlm_covariance(matrix, savepath, metric="cosine"):
    if not savepath.endswith("/"):
        savepath = savepath + "/"
    if os.path.exists(savepath + "__linkage_average.npy"):
        Z = np.load(savepath + "__linkage_average.npy")
    else:
        if not os.path.exists(savepath):
            os.makedirs(savepath)
        Z = linkage(matrix, method='average', metric=metric)
        np.save(savepath + "__linkage_average.npy", Z)
    if os.path.exists(savepath + "__covariance__.npy"):
        Cov = np.load(savepath + "__covariance__.npy")
        observables = HierarchicalObservation(Cov)
    else:
        root, nodes = to_tree(Z, rd=True)
        assign_parents(root)
        adj_mat = get_adjacency_matrix(nodes)
        deg_mat = get_degree_matrix(nodes)
        sigma = 5
        laplacian = np.diag(deg_mat) - adj_mat + 1/(sigma**2) * np.eye(len(deg_mat))
        Cov = np.linalg.inv(laplacian)[:matrix.shape[0], :matrix.shape[0]]
        np.save(savepath + "__covariance__.npy", Cov)
        observables = HierarchicalObservation(Cov)
    return observables
Exemple #6
0
recording='data/processed/METR-LA'


"""
Dataset

"""
# read sensor IDs
with open(sensor_ids) as f:
    sensor_ids = f.read().strip().split(',')

# read sensor distance
distance_df = pd.read_csv(sensor_distance, dtype={'from': 'str', 'to': 'str'})

# build adj matrix based on equation (10)
adj_mx = utils.get_adjacency_matrix(distance_df, sensor_ids)

data = utils.load_dataset(dataset_dir=recording, batch_size=batch_size, test_batch_size=batch_size)
test_data_loader = data['test_loader']
standard_scaler = data['scaler']

num_test_iteration_per_epoch = math.ceil(data['x_test'].shape[0] / batch_size)


"""
Restore model from the checkpoint

"""
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = DCRNNModel(adj_mx,