def load_fb15k(data_home=None): """Load fb15k dataset. See `here <https://papers.nips.cc/paper/5071-translating-embeddings-for-modeling-multi-relational-data>`__ for paper by Bordes et al. originally presenting the dataset. Parameters ---------- data_home: str, optional Path to the `torchkge_data` directory (containing data folders). If files are not present on disk in this directory, they are downloaded and then placed in the right place. Returns ------- kg_train: `torchkge.data.KnowledgeGraph` kg_val: `torchkge.data.KnowledgeGraph` kg_test: `torchkge.data.KnowledgeGraph` """ if data_home is None: data_home = get_data_home() data_path = data_home + '/FB15k' if not exists(data_path): makedirs(data_path, exist_ok=True) urlretrieve("https://graphs.telecom-paristech.fr/datasets/FB15k.zip", data_home + '/FB15k.zip') with zipfile.ZipFile(data_home + '/FB15k.zip', 'r') as zip_ref: zip_ref.extractall(data_home) remove(data_home + '/FB15k.zip') shutil.rmtree(data_home + '/__MACOSX') df1 = read_csv(data_path + '/freebase_mtr100_mte100-train.txt', sep='\t', header=None, names=['from', 'rel', 'to']) df2 = read_csv(data_path + '/freebase_mtr100_mte100-valid.txt', sep='\t', header=None, names=['from', 'rel', 'to']) df3 = read_csv(data_path + '/freebase_mtr100_mte100-test.txt', sep='\t', header=None, names=['from', 'rel', 'to']) df = concat([df1, df2, df3]) kg = KnowledgeGraph(df) return kg.split_kg(sizes=(len(df1), len(df2), len(df3)))
def load_fb15k237(data_home=None): """Load fb15k237 dataset. See `here <https://www.aclweb.org/anthology/D15-1174/>`__ for paper by Toutanova et al. originally presenting the dataset. Parameters ---------- data_home: str, optional Path to the `torchkge_data` directory (containing data folders). If files are not present on disk in this directory, they are downloaded and then placed in the right place. Returns ------- kg_train: `torchkge.data.KnowledgeGraph` kg_val: `torchkge.data.KnowledgeGraph` kg_test: `torchkge.data.KnowledgeGraph` """ if data_home is None: data_home = get_data_home() data_path = data_home + '/FB15k237' if not exists(data_path): makedirs(data_path, exist_ok=True) urlretrieve( "https://graphs.telecom-paristech.fr/datasets/FB15k237.zip", data_home + '/FB15k237.zip') with zipfile.ZipFile(data_home + '/FB15k237.zip', 'r') as zip_ref: zip_ref.extractall(data_home) remove(data_home + '/FB15k237.zip') shutil.rmtree(data_home + '/__MACOSX') df1 = read_csv(data_path + '/train.txt', sep='\t', header=None, names=['from', 'rel', 'to']) df2 = read_csv(data_path + '/valid.txt', sep='\t', header=None, names=['from', 'rel', 'to']) df3 = read_csv(data_path + '/test.txt', sep='\t', header=None, names=['from', 'rel', 'to']) df = concat([df1, df2, df3]) kg = KnowledgeGraph(df) return kg.split_kg(sizes=(len(df1), len(df2), len(df3)))
def load_wn18(data_home=None): """Load wn18 dataset. Parameters ---------- data_home: str, optional Path to the `torchkge_data` directory (containing data folders). If files are not present on disk in this directory, they are downloaded and then placed in the right place. Returns ------- kg_train: `torchkge.data.KnowledgeGraph` kg_val: `torchkge.data.KnowledgeGraph` kg_test: `torchkge.data.KnowledgeGraph` """ if data_home is None: data_home = get_data_home() data_path = data_home + '/WN18' if not exists(data_path): makedirs(data_path, exist_ok=True) urlretrieve("https://graphs.telecom-paristech.fr/datasets/WN18.zip", data_home + '/WN18.zip') with zipfile.ZipFile(data_home + '/WN18.zip', 'r') as zip_ref: zip_ref.extractall(data_home) remove(data_home + '/WN18.zip') shutil.rmtree(data_home + '/__MACOSX') df1 = read_csv(data_path + '/wordnet-mlj12-train.txt', sep='\t', header=None, names=['from', 'rel', 'to']) df2 = read_csv(data_path + '/wordnet-mlj12-valid.txt', sep='\t', header=None, names=['from', 'rel', 'to']) df3 = read_csv(data_path + '/wordnet-mlj12-test.txt', sep='\t', header=None, names=['from', 'rel', 'to']) df = concat([df1, df2, df3]) kg = KnowledgeGraph(df) return kg.split_kg(sizes=(len(df1), len(df2), len(df3)))
class Exclusive(BaseExtension): def __init__(self): BaseExtension.__init__(self) self.utils = Utils() self.kg = KnowledgeGraph() def excute(self, form_dict): rID = form_dict['rID'].strip() title = form_dict['rTitle'].strip() #fileName = form_dict['fileName'] url = form_dict['url'].strip() fileName = form_dict['originFileName'] print fileName r = self.utils.getRecord(rID, path=fileName) if r != None and r.get_id().strip() != '': db = fileName[fileName.find('db/') + 3:fileName.rfind('/')] + '/' key = fileName[fileName.rfind('/') + 1:] print db + ' ' + key #return 'http://' + Config.ip_adress + '/?db=' + db + '&key=' + key + '&filter=' + title.replace('...', '') + '&column=1' return 'http://' + Config.ip_adress + '/?db=' + db + '&key=' + key + '&filter=' + rID + '&column=1&enginType=' + Config.recommend_engin_type else: title = title.replace('%20', ' ') desc = 'engintype:' + title + ' ' desc += 'localdb:' + title + ' ' desc += self.kg.getCrossref( title, ' '.join(Config.exclusive_crossref_path)) record = Record('custom-exclusive-' + rID + ' | ' + title + ' | ' + url + ' | ' + desc) return self.utils.output2Disk([record], 'exclusive', 'exclusive') #if fileName.find("/custom") != -1: # fileName = form_dict['originFileName'] #if form_dict.has_key('fileName') and form_dict['fileName'] != '': # fileName = form_dict['fileName'] def check(self, form_dict): column = str(form_dict['column']).strip() #print 'exclusive check column ' + column return True
import subprocess import json from extension_manager import ExtensionManager from utils import Utils from config import Config import requests import datetime from flask import (Flask, flash, request, redirect, render_template, url_for, session) from rauth.service import OAuth2Service from record import Tag, Record from knowledgegraph import KnowledgeGraph tag = Tag() kg = KnowledgeGraph() # Use your own values in your real application github = OAuth2Service( name='github', base_url='https://api.github.com/', access_token_url='https://github.com/login/oauth/access_token', authorize_url='https://github.com/login/oauth/authorize', client_id='38f88bfb83a0908e0103', client_secret='7f0c4c5d52972e1d767d0145c6e02ce54342ade3', ) SECRET_KEY = '\xfb\x12\xdf\xa1@i\xd6>V\xc0\xbb\x8fp\x16#Z\x0b\x81\xeb\x16' utils = Utils() app = Flask(__name__) app.secret_key = SECRET_KEY extensionManager = ExtensionManager()
class Exclusive(BaseExtension): def __init__(self): BaseExtension.__init__(self) self.utils = Utils() self.kg = KnowledgeGraph() def excute(self, form_dict): rID = form_dict['rID'].strip() title = form_dict['rTitle'].replace('%20', ' ').strip() #fileName = form_dict['fileName'] url = form_dict['url'].strip() fileName = form_dict['originFileName'] print fileName if rID.startswith('loop-h'): historyPath = os.getcwd() + '/extensions/history/data/' + fileName[fileName.rfind('/') + 1 :] + '-history' print historyPath r = self.utils.getRecord(title, path=historyPath, matchType=2, use_cache=False, accurate=False) else: r = self.utils.getRecord(rID, path=fileName) if r != None and r.get_id().strip() != '': if rID.startswith('loop-h'): title = title.replace('%20', ' ') desc = r.get_describe() + ' ' + self.kg.getCrossref(title, ' '.join(Config.exclusive_crossref_path)) record = Record('custom-exclusive-' + rID + ' | '+ title + ' | ' + url + ' | ' + desc) localUrl = self.utils.output2Disk([record], 'exclusive', 'exclusive', append=Config.exclusive_append_mode) else: db = fileName[fileName.find('db/') + 3 : fileName.rfind('/')] + '/' key = fileName[fileName.rfind('/') + 1 :] print db + ' ' + key #return 'http://' + Config.ip_adress + '/?db=' + db + '&key=' + key + '&filter=' + title.replace('...', '') + '&column=1' localUrl = 'http://' + Config.ip_adress + '/?db=' + db + '&key=' + key + '&filter=' + rID + '&column=1&enginType=' + Config.recommend_engin_type localUrl = localUrl + '&crossrefQuery=""' return self.getUrl(r.get_url(), localUrl) else: title = title.replace('%20', ' ') desc = 'engintype:' + title + ' ' desc += 'localdb:' + title + ' ' desc += self.kg.getCrossref(title, ' '.join(Config.exclusive_crossref_path)) record = Record('custom-exclusive-' + rID + ' | '+ title + ' | ' + url + ' | ' + desc) localUrl = self.utils.output2Disk([record], 'exclusive', 'exclusive', append=Config.exclusive_append_mode) localUrl = localUrl + '&crossrefQuery=""' return self.getUrl(url, localUrl) #if fileName.find("/custom") != -1: # fileName = form_dict['originFileName'] #if form_dict.has_key('fileName') and form_dict['fileName'] != '': # fileName = form_dict['fileName'] def getUrl(self, url, localUrl): for k, v in Config.exclusive_default_tab.items(): if url.find(k) != -1: localUrl += '&extension=' + v break return localUrl def check(self, form_dict): column = str(form_dict['column']).strip() #print 'exclusive check column ' + column return True
def __init__(self): BaseExtension.__init__(self) self.utils = Utils() self.kg = KnowledgeGraph()
def load_wikidatasets(which, limit_=None, data_home=None): """Load WikiDataSets dataset. See `here <https://arxiv.org/abs/1906.04536>`__ for paper by Boschin et al. originally presenting the dataset. Parameters ---------- which: str String indicating which subset of Wikidata should be loaded. Available ones are `humans`, `companies`, `animals`, `countries` and `films`. limit_: int, optional (default=0) This indicates a lower limit on the number of neighbors an entity should have in the graph to be kept. data_home: str, optional Path to the `torchkge_data` directory (containing data folders). If files are not present on disk in this directory, they are downloaded and then placed in the right place. Returns ------- kg_train: `torchkge.data.KnowledgeGraph` kg_val: `torchkge.data.KnowledgeGraph` kg_test: `torchkge.data.KnowledgeGraph` """ assert which in ['humans', 'companies', 'animals', 'countries', 'films'] if data_home is None: data_home = get_data_home() data_home = data_home + '/WikiDataSets' data_path = data_home + '/' + which if not exists(data_path): makedirs(data_path, exist_ok=True) urlretrieve( "https://graphs.telecom-paristech.fr/WikiDataSets/{}.tar.gz". format(which), data_home + '/{}.tar.gz'.format(which)) with tarfile.open(data_home + '/{}.tar.gz'.format(which), 'r') as tf: tf.extractall(data_home) remove(data_home + '/{}.tar.gz'.format(which)) df = read_csv(data_path + '/edges.txt'.format(which), sep='\t', header=1, names=['from', 'to', 'rel']) a = df.groupby('from').count()['rel'] b = df.groupby('to').count()['rel'] # Filter out nodes with too few facts tmp = merge( right=DataFrame(a).reset_index(), left=DataFrame(b).reset_index(), how='outer', right_on='from', left_on='to', ).fillna(0) tmp['rel'] = tmp['rel_x'] + tmp['rel_y'] tmp = tmp.drop(['from', 'rel_x', 'rel_y'], axis=1) tmp = tmp.loc[tmp['rel'] >= limit_] df_bis = df.loc[df['from'].isin(tmp['to']) | df['to'].isin(tmp['to'])] kg = KnowledgeGraph(df_bis) kg_train, kg_val, kg_test = kg.split_kg(share=0.8, validation=True) return kg_train, kg_val, kg_test