Exemple #1
0
def load_fb15k(data_home=None):
    """Load fb15k dataset. See `here
    <https://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data>`__
    for paper by Bordes et al. originally presenting the dataset.

    Parameters
    ----------
    data_home: str, optional
        Path to the `torchkge_data` directory (containing data folders). If files are not present on disk in this
        directory, they are downloaded and then placed in the right place.

    Returns
    -------
    kg_train: `torchkge.data.KnowledgeGraph`
    kg_val: `torchkge.data.KnowledgeGraph`
    kg_test: `torchkge.data.KnowledgeGraph`

    """
    if data_home is None:
        data_home = get_data_home()
    data_path = data_home + '/FB15k'
    if not exists(data_path):
        makedirs(data_path, exist_ok=True)
        urlretrieve("https://graphs.telecom-paristech.fr/datasets/FB15k.zip",
                    data_home + '/FB15k.zip')
        with zipfile.ZipFile(data_home + '/FB15k.zip', 'r') as zip_ref:
            zip_ref.extractall(data_home)
        remove(data_home + '/FB15k.zip')
        shutil.rmtree(data_home + '/__MACOSX')

    df1 = read_csv(data_path + '/freebase_mtr100_mte100-train.txt',
                   sep='\t',
                   header=None,
                   names=['from', 'rel', 'to'])
    df2 = read_csv(data_path + '/freebase_mtr100_mte100-valid.txt',
                   sep='\t',
                   header=None,
                   names=['from', 'rel', 'to'])
    df3 = read_csv(data_path + '/freebase_mtr100_mte100-test.txt',
                   sep='\t',
                   header=None,
                   names=['from', 'rel', 'to'])
    df = concat([df1, df2, df3])
    kg = KnowledgeGraph(df)

    return kg.split_kg(sizes=(len(df1), len(df2), len(df3)))
Exemple #2
0
def load_fb15k237(data_home=None):
    """Load fb15k237 dataset. See `here
    <https://www.aclweb.org/anthology/D15-1174/>`__ for paper by Toutanova et al. originally presenting the dataset.

    Parameters
    ----------
    data_home: str, optional
        Path to the `torchkge_data` directory (containing data folders). If files are not present on disk in this
        directory, they are downloaded and then placed in the right place.

    Returns
    -------
    kg_train: `torchkge.data.KnowledgeGraph`
    kg_val: `torchkge.data.KnowledgeGraph`
    kg_test: `torchkge.data.KnowledgeGraph`

    """
    if data_home is None:
        data_home = get_data_home()
    data_path = data_home + '/FB15k237'
    if not exists(data_path):
        makedirs(data_path, exist_ok=True)
        urlretrieve(
            "https://graphs.telecom-paristech.fr/datasets/FB15k237.zip",
            data_home + '/FB15k237.zip')
        with zipfile.ZipFile(data_home + '/FB15k237.zip', 'r') as zip_ref:
            zip_ref.extractall(data_home)
        remove(data_home + '/FB15k237.zip')
        shutil.rmtree(data_home + '/__MACOSX')

    df1 = read_csv(data_path + '/train.txt',
                   sep='\t',
                   header=None,
                   names=['from', 'rel', 'to'])
    df2 = read_csv(data_path + '/valid.txt',
                   sep='\t',
                   header=None,
                   names=['from', 'rel', 'to'])
    df3 = read_csv(data_path + '/test.txt',
                   sep='\t',
                   header=None,
                   names=['from', 'rel', 'to'])
    df = concat([df1, df2, df3])
    kg = KnowledgeGraph(df)

    return kg.split_kg(sizes=(len(df1), len(df2), len(df3)))
Exemple #3
0
def load_wn18(data_home=None):
    """Load wn18 dataset.

    Parameters
    ----------
    data_home: str, optional
        Path to the `torchkge_data` directory (containing data folders). If files are not present on disk in this
        directory, they are downloaded and then placed in the right place.

    Returns
    -------
    kg_train: `torchkge.data.KnowledgeGraph`
    kg_val: `torchkge.data.KnowledgeGraph`
    kg_test: `torchkge.data.KnowledgeGraph`

    """
    if data_home is None:
        data_home = get_data_home()
    data_path = data_home + '/WN18'
    if not exists(data_path):
        makedirs(data_path, exist_ok=True)
        urlretrieve("https://graphs.telecom-paristech.fr/datasets/WN18.zip",
                    data_home + '/WN18.zip')
        with zipfile.ZipFile(data_home + '/WN18.zip', 'r') as zip_ref:
            zip_ref.extractall(data_home)
        remove(data_home + '/WN18.zip')
        shutil.rmtree(data_home + '/__MACOSX')

    df1 = read_csv(data_path + '/wordnet-mlj12-train.txt',
                   sep='\t',
                   header=None,
                   names=['from', 'rel', 'to'])
    df2 = read_csv(data_path + '/wordnet-mlj12-valid.txt',
                   sep='\t',
                   header=None,
                   names=['from', 'rel', 'to'])
    df3 = read_csv(data_path + '/wordnet-mlj12-test.txt',
                   sep='\t',
                   header=None,
                   names=['from', 'rel', 'to'])
    df = concat([df1, df2, df3])
    kg = KnowledgeGraph(df)

    return kg.split_kg(sizes=(len(df1), len(df2), len(df3)))
Exemple #4
0
class Exclusive(BaseExtension):
    def __init__(self):
        BaseExtension.__init__(self)
        self.utils = Utils()
        self.kg = KnowledgeGraph()

    def excute(self, form_dict):
        rID = form_dict['rID'].strip()
        title = form_dict['rTitle'].strip()
        #fileName = form_dict['fileName']
        url = form_dict['url'].strip()
        fileName = form_dict['originFileName']
        print fileName
        r = self.utils.getRecord(rID, path=fileName)

        if r != None and r.get_id().strip() != '':
            db = fileName[fileName.find('db/') + 3:fileName.rfind('/')] + '/'
            key = fileName[fileName.rfind('/') + 1:]
            print db + ' ' + key
            #return 'http://' + Config.ip_adress + '/?db=' + db + '&key=' + key + '&filter=' + title.replace('...', '') + '&column=1'
            return 'http://' + Config.ip_adress + '/?db=' + db + '&key=' + key + '&filter=' + rID + '&column=1&enginType=' + Config.recommend_engin_type
        else:
            title = title.replace('%20', ' ')
            desc = 'engintype:' + title + ' '
            desc += 'localdb:' + title + ' '
            desc += self.kg.getCrossref(
                title, ' '.join(Config.exclusive_crossref_path))
            record = Record('custom-exclusive-' + rID + ' | ' + title + ' | ' +
                            url + ' | ' + desc)
            return self.utils.output2Disk([record], 'exclusive', 'exclusive')
        #if fileName.find("/custom") != -1:
        #    fileName = form_dict['originFileName']
        #if form_dict.has_key('fileName') and form_dict['fileName'] != '':
        #    fileName = form_dict['fileName']

    def check(self, form_dict):
        column = str(form_dict['column']).strip()
        #print 'exclusive check column ' + column
        return True
Exemple #5
0
import subprocess
import json
from extension_manager import ExtensionManager
from utils import Utils
from config import Config
import requests
import datetime

from flask import (Flask, flash, request, redirect, render_template, url_for,
                   session)
from rauth.service import OAuth2Service
from record import Tag, Record
from knowledgegraph import KnowledgeGraph

tag = Tag()
kg = KnowledgeGraph()
# Use your own values in your real application
github = OAuth2Service(
    name='github',
    base_url='https://api.github.com/',
    access_token_url='https://github.com/login/oauth/access_token',
    authorize_url='https://github.com/login/oauth/authorize',
    client_id='38f88bfb83a0908e0103',
    client_secret='7f0c4c5d52972e1d767d0145c6e02ce54342ade3',
)
SECRET_KEY = '\xfb\x12\xdf\xa1@i\xd6>V\xc0\xbb\x8fp\x16#Z\x0b\x81\xeb\x16'
utils = Utils()
app = Flask(__name__)
app.secret_key = SECRET_KEY

extensionManager = ExtensionManager()
Exemple #6
0
class Exclusive(BaseExtension):

    def __init__(self):
        BaseExtension.__init__(self)
        self.utils = Utils()
        self.kg = KnowledgeGraph()

    def excute(self, form_dict):
        rID = form_dict['rID'].strip()
        title = form_dict['rTitle'].replace('%20', ' ').strip()
        #fileName = form_dict['fileName']
        url = form_dict['url'].strip()
        fileName = form_dict['originFileName']
        print fileName
        if rID.startswith('loop-h'):
            historyPath = os.getcwd() + '/extensions/history/data/' + fileName[fileName.rfind('/') + 1 :] + '-history' 
            print historyPath
            r = self.utils.getRecord(title, path=historyPath, matchType=2, use_cache=False, accurate=False)
        else:
            r = self.utils.getRecord(rID, path=fileName)

        if r != None and r.get_id().strip() != '':

            if rID.startswith('loop-h'):
                title = title.replace('%20', ' ')
                desc = r.get_describe() + ' ' + self.kg.getCrossref(title, ' '.join(Config.exclusive_crossref_path))
                record = Record('custom-exclusive-' + rID + ' | '+ title + ' | ' + url + ' | ' + desc)
                localUrl = self.utils.output2Disk([record], 'exclusive', 'exclusive', append=Config.exclusive_append_mode)

            else:
                db = fileName[fileName.find('db/') + 3 : fileName.rfind('/')] + '/'
                key = fileName[fileName.rfind('/') + 1 :]
                print db + ' ' + key
                #return 'http://' + Config.ip_adress + '/?db=' + db + '&key=' + key + '&filter=' + title.replace('...', '') + '&column=1'
                localUrl = 'http://' + Config.ip_adress + '/?db=' + db + '&key=' + key + '&filter=' + rID + '&column=1&enginType='  + Config.recommend_engin_type

            localUrl = localUrl + '&crossrefQuery=""'
            return self.getUrl(r.get_url(), localUrl)
        else:
            title = title.replace('%20', ' ')
            desc = 'engintype:' + title + ' '
            desc += 'localdb:' + title + ' '
            desc += self.kg.getCrossref(title, ' '.join(Config.exclusive_crossref_path))
            record = Record('custom-exclusive-' + rID + ' | '+ title + ' | ' + url + ' | ' + desc)
            localUrl = self.utils.output2Disk([record], 'exclusive', 'exclusive', append=Config.exclusive_append_mode)
            localUrl = localUrl + '&crossrefQuery=""'
            return self.getUrl(url, localUrl)

        #if fileName.find("/custom") != -1:
        #    fileName = form_dict['originFileName']
        #if form_dict.has_key('fileName') and form_dict['fileName'] != '':
        #    fileName = form_dict['fileName']

    def getUrl(self, url, localUrl):
        for k, v in Config.exclusive_default_tab.items():
            if url.find(k) != -1:
                localUrl += '&extension=' + v
                break
        return localUrl  

    def check(self, form_dict):
	    column = str(form_dict['column']).strip()
        #print 'exclusive check column ' + column
	    return True
Exemple #7
0
 def __init__(self):
     BaseExtension.__init__(self)
     self.utils = Utils()
     self.kg = KnowledgeGraph()
Exemple #8
0
def load_wikidatasets(which, limit_=None, data_home=None):
    """Load WikiDataSets dataset. See `here
    <https://arxiv.org/abs/1906.04536>`__ for paper by Boschin et al. originally presenting the dataset.

    Parameters
    ----------
    which: str
        String indicating which subset of Wikidata should be loaded. Available ones are `humans`, `companies`,
        `animals`, `countries` and `films`.
    limit_: int, optional (default=0)
        This indicates a lower limit on the number of neighbors an entity should have in the graph to be kept.
    data_home: str, optional
        Path to the `torchkge_data` directory (containing data folders). If files are not present on disk in this
        directory, they are downloaded and then placed in the right place.

    Returns
    -------
    kg_train: `torchkge.data.KnowledgeGraph`
    kg_val: `torchkge.data.KnowledgeGraph`
    kg_test: `torchkge.data.KnowledgeGraph`

    """
    assert which in ['humans', 'companies', 'animals', 'countries', 'films']

    if data_home is None:
        data_home = get_data_home()

    data_home = data_home + '/WikiDataSets'
    data_path = data_home + '/' + which
    if not exists(data_path):
        makedirs(data_path, exist_ok=True)
        urlretrieve(
            "https://graphs.telecom-paristech.fr/WikiDataSets/{}.tar.gz".
            format(which), data_home + '/{}.tar.gz'.format(which))

        with tarfile.open(data_home + '/{}.tar.gz'.format(which), 'r') as tf:
            tf.extractall(data_home)
        remove(data_home + '/{}.tar.gz'.format(which))

    df = read_csv(data_path + '/edges.txt'.format(which),
                  sep='\t',
                  header=1,
                  names=['from', 'to', 'rel'])

    a = df.groupby('from').count()['rel']
    b = df.groupby('to').count()['rel']

    # Filter out nodes with too few facts
    tmp = merge(
        right=DataFrame(a).reset_index(),
        left=DataFrame(b).reset_index(),
        how='outer',
        right_on='from',
        left_on='to',
    ).fillna(0)

    tmp['rel'] = tmp['rel_x'] + tmp['rel_y']
    tmp = tmp.drop(['from', 'rel_x', 'rel_y'], axis=1)

    tmp = tmp.loc[tmp['rel'] >= limit_]
    df_bis = df.loc[df['from'].isin(tmp['to']) | df['to'].isin(tmp['to'])]

    kg = KnowledgeGraph(df_bis)
    kg_train, kg_val, kg_test = kg.split_kg(share=0.8, validation=True)

    return kg_train, kg_val, kg_test