Ejemplo n.º 1
0
def main(outfile=''):
    manifolds = ['euclidean',
                 # 'transe',
                 'poincare',
                 'lorentz']
    dimensions = [5, 10, 20, 50, 100, 200]
    js = {}
    
    for mani in manifolds:
        for dim in dimensions:
            key_json = '%s%d' % (mani, dim)            
            txtdir = './emb150txt/%s%d/' % (mani, dim)
            txtfile = txtdir + ('%s%d.txt' % (mani, dim))
            print('loading %s...' % txtfile)
            keyvalues = PoincareKeyedVectors.load_word2vec_format(txtfile,
                                                                  fvocab=None,
                                                                  binary=False,
                                                                  encoding='utf8',
                                                                  unicode_errors='strict',
                                                                  limit=None,
                                                                  datatype=np.float64)

            pr = generate_data()
            actual_dim = dim
            if mani == 'lorentz':
                actual_dim += 1
            eva = ReconstructionEvaluation(pr, keyvalues)
            # print('filename=%s: ' % txtfile)
            res = eva.evaluate()
            js[key_json] = res
    with open(outfile, 'w') as fp:
        json.dump(js,fp)
Ejemplo n.º 2
0
    def __init__(self, ns):
        """
        :param ns: ['molecular_function', 'biological_process', 'cellular_component']

        """

        self._aspect = aspect = GoAspect(ns)
        self._graph = G = get_ontology_graph(ns)
        classes = list(reversed(list(nx.topological_sort(G))))

        self.root = root = classes[0]
        self._levels = self.bfs(root)

        self._levels = levels = dict()
        for node, lvl in nx.shortest_path_length(G, target=root).items():
            if lvl in levels:
                levels[lvl].append(node)
            else:
                levels[lvl] = [node]

        self._mlb = MultiLabelBinarizer().fit([classes])

        key_val = [(go, i) for i, go in enumerate(classes)]
        self.go2ix = {k: v for k, v in key_val}
        self.ix2go = {v: k for k, v in key_val}

        emb_fname = os.path.join('%s/%s-poincare-dim%d-epochs%d.emb' %
                                 (DATA_ROOT, aspect, dim, num_epochs))

        if os.path.exists(emb_fname):
            self._kv = PoincareKeyedVectors.load(emb_fname)
        else:
            self._kv = embedding(ns, emb_fname)
Ejemplo n.º 3
0
    def __init__(self, params):
        super().__init__(params)
        self.wiktionary = self.__get_wiktionary(params['wiki_path'])
        self.wiki_model = KeyedVectors.load_word2vec_format(params['wiki_vectors_path'], binary=False)
        self.node2vec = KeyedVectors.load_word2vec_format(params["node2vec_path"], binary=False)
        self.n = params['n']
        self.projection = ProjectionVectorizer(self.w2v_data, params["projection_path"])
        self.poincare_model = PoincareKeyedVectors.load_word2vec_format(params["poincare_path"], binary=False)
        self.n = params["n"]

        self.delete_bracets = re.compile(r"\(.+?\)")
        if params['language'] == 'ru':
            self.pattern = re.compile("[^А-я \-]")
        else:
            self.pattern = re.compile("[^A-z \-]")
Ejemplo n.º 4
0
def load_poincare_model(path, word2vec_format=True, binary=False):
    """ Load a Poincare embedding model.

    :param path: path of the file of the pre-trained Poincare embedding model
    :param word2vec_format: whether to load from word2vec format (default: True)
    :param binary: binary format (default: False)
    :return: a pre-trained Poincare embedding model
    :type path: str
    :type word2vec_format: bool
    :type binary: bool
    :rtype: gensim.models.poincare.PoincareKeyedVectors
    """
    if word2vec_format:
        return PoincareKeyedVectors.load_word2vec_format(path, binary=binary)
    else:
        return PoincareModel.load(path).kv
def main(poincare=''):
    from gensim.models.poincare import PoincareModel
    pm = PoincareModel([], size=300, dtype=np.float64)
    emb = PoincareKeyedVectors.load_word2vec_format(poincare,
                                                    fvocab=None,
                                                    binary=False,
                                                    encoding='utf8',
                                                    unicode_errors='strict',
                                                    limit=None,
                                                    datatype=np.float64)
    pm.kv = emb

    pm.save('w2v_poincare.pickle', pickle_protocol=4)
    pm2 = PoincareModel.load('w2v_poincare.pickle')

    pm2.train(10000, batch_size=10, print_every=1, check_gradients_every=None)
    pm2.save('w2v_poincare_after_train.pickle', pickle_protocol=4)
Ejemplo n.º 6
0
def poincare_distance_heatmap(origin_point, x_range=(-1.0, 1.0), y_range=(-1.0, 1.0), num_points=100):
    """Create a heatmap of Poincare distances from `origin_point` for each point (x, y),
    where x and y lie in `x_range` and `y_range` respectively, with `num_points` points chosen uniformly in both ranges.

    Parameters
    ----------
    origin_point : tuple (int, int)
        (x, y) from which distances are to be measured and plotted.
    x_range : tuple (int, int)
        Range for x-axis from which to choose `num_points` points.
    y_range : tuple (int, int)
        Range for y-axis from which to choose `num_points` points.
    num_points : int
        Number of points to choose from `x_range` and `y_range`.

    Notes
    -----
    Points outside the unit circle are ignored, since the Poincare distance is defined
    only for points inside the circle boundaries (exclusive of the boundary).

    Returns
    -------
    :class:`plotly.graph_objs.Figure`
        Plotly figure that contains plot

    """
    epsilon = 1e-8  # Can't choose (-1.0, -1.0) or (1.0, 1.0), distance undefined
    x_range, y_range = list(x_range), list(y_range)
    if x_range[0] == -1.0 and y_range[0] == -1.0:
        x_range[0] += epsilon
        y_range[0] += epsilon
    if x_range[0] == 1.0 and y_range[0] == 1.0:
        x_range[0] -= epsilon
        y_range[0] -= epsilon

    x_axis_values = np.linspace(x_range[0], x_range[1], num=num_points)
    y_axis_values = np.linspace(x_range[0], x_range[1], num=num_points)
    x, y = np.meshgrid(x_axis_values, y_axis_values)
    all_points = np.dstack((x, y)).swapaxes(1, 2).swapaxes(0, 1).reshape(2, num_points ** 2).T
    norms = np.linalg.norm(all_points, axis=1)
    all_points = all_points[norms < 1]

    origin_point = np.array(origin_point)
    all_distances = PoincareKeyedVectors.poincare_dists(origin_point, all_points)

    distances = go.Scatter(
        x=all_points[:, 0],
        y=all_points[:, 1],
        mode='markers',
        marker=dict(
            size='9',
            color=all_distances,
            colorscale='Viridis',
            showscale=True,
            colorbar=go.ColorBar(
                title='Poincare Distance'
            ),
        ),
        text=[
            'Distance from (%.2f, %.2f): %.2f' % (origin_point[0], origin_point[1], d)
            for d in all_distances],
        name='',  # To avoid the default 'trace 0'
    )

    origin = go.Scatter(
        x=[origin_point[0]],
        y=[origin_point[1]],
        name='Distance from (%.2f, %.2f)' % (origin_point[0], origin_point[1]),
        mode='markers+text',
        marker=dict(
            size='10',
            color='rgb(200, 50, 50)'
        )
    )

    layout = go.Layout(
        width=900,
        height=800,
        showlegend=False,
        title='Poincare Distances from (%.2f, %.2f)' % (origin_point[0], origin_point[1]),
        hovermode='closest',
    )

    return go.Figure(data=[distances, origin], layout=layout)
Ejemplo n.º 7
0
 def setUp(self):
     self.vectors = PoincareKeyedVectors.load_word2vec_format(
         datapath('poincare_vectors.bin'), binary=True)
Ejemplo n.º 8
0
 def setUp(self):
     self.vectors = PoincareKeyedVectors.load_word2vec_format(datapath('poincare_vectors.bin'), binary=True)
Ejemplo n.º 9
0
def poincare_distance_heatmap(origin_point, x_range=(-1.0, 1.0), y_range=(-1.0, 1.0), num_points=100):
    """Create a heatmap of Poincare distances from `origin_point` for each point (x, y),
    where x and y lie in `x_range` and `y_range` respectively, with `num_points` points chosen uniformly in both ranges.

    Parameters
    ----------
    origin_point : tuple (int, int)
        (x, y) from which distances are to be measured and plotted.
    x_range : tuple (int, int)
        Range for x-axis from which to choose `num_points` points.
    y_range : tuple (int, int)
        Range for y-axis from which to choose `num_points` points.
    num_points : int
        Number of points to choose from `x_range` and `y_range`.

    Notes
    -----
    Points outside the unit circle are ignored, since the Poincare distance is defined
    only for points inside the circle boundaries (exclusive of the boundary).

    Returns
    -------
    :class:`plotly.graph_objs.Figure`
        Plotly figure that contains plot

    """
    epsilon = 1e-8  # Can't choose (-1.0, -1.0) or (1.0, 1.0), distance undefined
    x_range, y_range = list(x_range), list(y_range)
    if x_range[0] == -1.0 and y_range[0] == -1.0:
        x_range[0] += epsilon
        y_range[0] += epsilon
    if x_range[0] == 1.0 and y_range[0] == 1.0:
        x_range[0] -= epsilon
        y_range[0] -= epsilon

    x_axis_values = np.linspace(x_range[0], x_range[1], num=num_points)
    y_axis_values = np.linspace(x_range[0], x_range[1], num=num_points)
    x, y = np.meshgrid(x_axis_values, y_axis_values)
    all_points = np.dstack((x, y)).swapaxes(1, 2).swapaxes(0, 1).reshape(2, num_points ** 2).T
    norms = np.linalg.norm(all_points, axis=1)
    all_points = all_points[norms < 1]

    origin_point = np.array(origin_point)
    all_distances = PoincareKeyedVectors.poincare_dists(origin_point, all_points)

    distances = go.Scatter(
        x=all_points[:, 0],
        y=all_points[:, 1],
        mode='markers',
        marker=dict(
            size='9',
            color=all_distances,
            colorscale='Viridis',
            showscale=True,
            colorbar=go.ColorBar(
                title='Poincare Distance'
            ),
        ),
        text=[
            'Distance from (%.2f, %.2f): %.2f' % (origin_point[0], origin_point[1], d)
            for d in all_distances],
        name='',  # To avoid the default 'trace 0'
    )

    origin = go.Scatter(
        x=[origin_point[0]],
        y=[origin_point[1]],
        name='Distance from (%.2f, %.2f)' % (origin_point[0], origin_point[1]),
        mode='markers+text',
        marker=dict(
            size='10',
            color='rgb(200, 50, 50)'
        )
    )

    layout = go.Layout(
        width=900,
        height=800,
        showlegend=False,
        title='Poincare Distances from (%.2f, %.2f)' % (origin_point[0], origin_point[1]),
        hovermode='closest',
    )

    return go.Figure(data=[distances, origin], layout=layout)
Ejemplo n.º 10
0
 def __init__(self, params):
     super().__init__(params)
     self.poincare_model = PoincareKeyedVectors.load_word2vec_format(params["poincare_path"], binary=False)
     self.n = params["n"]
Ejemplo n.º 11
0
from gensim.models.poincare import PoincareModel, PoincareKeyedVectors, PoincareRelations
from gensim.viz.poincare import poincare_distance_heatmap

from tensorflow.keras.layers import Embedding

wordnet_mamal_file_path = '/Users/pankaj/dev/git/smu/nlp337/data/mamals.tsv'
relations = PoincareRelations(wordnet_mamal_file_path, delimiter='\t')
model = PoincareModel(train_data=relations, size=2, burn_in=0)
model.train(epochs=2, print_every=500)

pcv = PoincareKeyedVectors(vector_size=20)

poincare_distance_heatmap((0, 0),
                          x_range=(-1.0, 1.0),
                          y_range=(-1.0, 1.0),
                          num_points=100)