def testmerge_display(db1='test1', db2='test2', dataset=None): if dataset == None: conceptdb.connect_to_mongodb(db1) print "Before the merge, db %s has the following assertions: " % db1 for a1 in Assertion.objects: print "assertion: %s" % a1 print " confidence score: %s" % a1.confidence for r1 in list(ReasonConjunction.objects.filter(target=a1.name)): print " reason: %s" % r1.factors assert r1.target == a1.name conceptdb.connect_to_mongodb(db2) print "Before the merge, db %s has the following assertions: " % db2 for a2 in Assertion.objects: print "assertion: %s" % a2 print " confidence score: %s" % a2.confidence for r2 in list(ReasonConjunction.objects.filter(target=a2.name)): print " reason: %s" % r2.factors assert r2.target == a2.name else: conceptdb.connect_to_mongodb(db1) print "Before the merge, db %s has the following assertions: " % db1 for a1 in Assertion.objects.filter(dataset=dataset): print "assertion: %s" % a1 print " confidence score: %s" % a1.confidence for r1 in list(ReasonConjunction.objects.filter(target=a1.name)): print " reason: %s" % r1.factors assert r1.target == a1.name conceptdb.connect_to_mongodb(db2) print "Before the merge, db %s has the following assertions: " % db2 for a2 in Assertion.objects.filter(dataset=dataset): print "assertion: %s" % a2 print " confidence score: %s" % a2.confidence for r2 in list(ReasonConjunction.objects.filter(target=a2.name)): print " reason: %s" % r2.factors assert r2.target == a2.name
def testmerge_display(db1='test1', db2='test2', dataset=None): if dataset == None: conceptdb.connect_to_mongodb(db1) print "Before the merge, db %s has the following assertions: "%db1 for a1 in Assertion.objects: print "assertion: %s"%a1 print " confidence score: %s"%a1.confidence for r1 in list(ReasonConjunction.objects.filter(target=a1.name)): print " reason: %s"%r1.factors assert r1.target == a1.name conceptdb.connect_to_mongodb(db2) print "Before the merge, db %s has the following assertions: "%db2 for a2 in Assertion.objects: print "assertion: %s"%a2 print " confidence score: %s"%a2.confidence for r2 in list(ReasonConjunction.objects.filter(target=a2.name)): print " reason: %s"%r2.factors assert r2.target == a2.name else: conceptdb.connect_to_mongodb(db1) print "Before the merge, db %s has the following assertions: "%db1 for a1 in Assertion.objects.filter(dataset=dataset): print "assertion: %s"%a1 print " confidence score: %s"%a1.confidence for r1 in list(ReasonConjunction.objects.filter(target=a1.name)): print " reason: %s"%r1.factors assert r1.target == a1.name conceptdb.connect_to_mongodb(db2) print "Before the merge, db %s has the following assertions: "%db2 for a2 in Assertion.objects.filter(dataset=dataset): print "assertion: %s"%a2 print " confidence score: %s"%a2.confidence for r2 in list(ReasonConjunction.objects.filter(target=a2.name)): print " reason: %s"%r2.factors assert r2.target == a2.name
from conceptdb.metadata import Dataset import conceptdb from conceptdb.assertion import Sentence conceptdb.connect_to_mongodb('test') def test_sentence(): dataset = Dataset.create(language='en', name='/data/test') #create test sentence with dataset sentence1 = Sentence.make('/data/test', "This is a test sentence.") #check it was saved to the database assert sentence1.id is not None #make sure its attributes are readable sentence1.text sentence1.words sentence1.dataset sentence1.derived_assertions sentence1.confidence #make the same sentence, this time using dataset object instead of string sentence2 = Sentence.make(dataset, "This is a test sentence.") #check that it was saved to the database assert sentence2.id is not None #check that sentence1 and sentence2 have the same ID assert (sentence1.id == sentence2.id)
] def import_sentence(sent): dataset = Dataset.make(DATASET_ROOT + sent.language.id, sent.language.id) activity = sent.activity.name root = dataset.get_root_reason() site = root.derived_reason('/site/omcs') act_reason = site.derived_reason(ACTIVITY_ROOT + activity.replace(' ', '_')) contrib_reason = site.derived_reason(CONTRIBUTOR_ROOT + sent.creator.username) justification = [act_reason, contrib_reason] newsent = assertion.Sentence.make(dataset, sent.text, justification) log.info(str(newsent)) def import_sentences(): print "importing sentences." sentences = Sentence.objects.filter(score__gt=0, language__id='en') print len(sentences) for sent in sentences: activity = sent.activity.name if activity in BAD_ACTIVITIES: continue import_sentence(sent) if __name__ == '__main__': conceptdb.connect_to_mongodb('conceptdb') import_sentences()
from piston.utils import throttle, rc from piston.authentication import HttpBasicAuthentication from conceptdb.assertion import Assertion, Sentence, Expression from conceptdb.metadata import Dataset from conceptdb.justify import ReasonConjunction from conceptdb.freebase_imports import MQLQuery from conceptdb import ConceptDBDocument from conceptdb.db_merge import merge import conceptdb from mongoengine.queryset import DoesNotExist from mongoengine.base import ValidationError from csc.conceptnet.models import User basic_auth = HttpBasicAuthentication() conceptdb.connect_to_mongodb('test') #NOTE: change when not testing class ConceptDBHandler(BaseHandler): """The ConceptDBHandler deals with all accesses to the conceptdb from the api. A GET to it can return a dataset, assertion, or reason. Searching for a concept will return the top ranked assertions that the concept is part of. A POST to it can create an assertion or vote on one.""" allowed_methods = ('GET','POST') @throttle(600,60,'read') def read(self, request, obj_url): obj_url = '/'+obj_url if obj_url.startswith('/data'):#try to find matching dataset return self.datasetLookup(obj_url)
def main(): conceptdb.connect_to_mongodb('conceptdb') import_activities('en') import_contributors('en') import_assertions('en')
if count <100: print count count += 1 continue else: print line.split()[0] q = MQLQuery.make({'mid':line.split()[0]},['*']) q.get_results('/data/freebase', 'nholm', 1, None, True, 'mid') count += 1 if count > 200: break dump.close() if __name__ == "__main__": conceptdb.connect_to_mongodb('conceptdb') print len(Assertion.objects) prev_len = len(Assertion.objects) test_datadumpread("freebase-simple-topic-dump.tsv") #test_import_traversing() print '%d assertions made.'%(len(Assertion.objects)-prev_len) #for a in Assertion.objects: # print a.relation # print a.arguments
def testmerge_check(db1='test1', db2='test2', dataset=None): ''' Check post-merge elements, make sure they match ''' if dataset == None: print "Testing Assertion objects" conceptdb.connect_to_mongodb(db1) db1_assertions = Assertion.objects conceptdb.connect_to_mongodb(db2) db2_assertions = Assertion.objects for db1_a in [ a1 for a1 in list(db1_assertions) if a1 not in list(db2_assertions) ]: check = False for db2_check in list(db2_assertions): if assertion_check(db1_a, db2_check): check = True assert check == True for db2_a in [ a2 for a2 in list(db2_assertions) if a2 not in list(db1_assertions) ]: check = False for db1_check in list(db1_assertions): if assertion_check(db2_a, db1_check): check = True assert check == True print "Assertion test PASSED" print "Testing ReasonConjunction objects" conceptdb.connect_to_mongodb(db1) #print "After the merge, db %s has the following assertions: "%db1 for a1 in Assertion.objects: #print "assertion: %s"%a1 #print " confidence score: %s"%a1.confidence for r1 in list(ReasonConjunction.objects.filter(target=a1.name)): #print " reason: %s"%r1.factors assert r1.target == a1.name Assertion.drop_collection() Dataset.drop_collection() ReasonConjunction.drop_collection() conceptdb.connect_to_mongodb(db2) #print "After the merge, db %s has the following assertions: "%db2 for a2 in Assertion.objects: #print "assertion: %s"%a2 #print " confidence score: %s"%a2.confidence for r2 in list(ReasonConjunction.objects.filter(target=a2.name)): #print " reason: %s"%r2.factors assert r2.target == a2.name print "ReasonConjunction test PASSED" Assertion.drop_collection() Dataset.drop_collection() ReasonConjunction.drop_collection() else: print "Testing Assertion objects" conceptdb.connect_to_mongodb(db1) db1_assertions = Assertion.objects.filter(dataset=dataset) conceptdb.connect_to_mongodb(db2) db2_assertions = Assertion.objects.filter(dataset=dataset) for db1_a in [ a1 for a1 in list(db1_assertions) if a1 not in list(db2_assertions) ]: check = False for db2_check in list(db2_assertions): if assertion_check(db1_a, db2_check): check = True assert check == True for db2_a in [ a2 for a2 in list(db2_assertions) if a2 not in list(db1_assertions) ]: check = False for db1_check in list(db1_assertions): if assertion_check(db2_a, db1_check): check = True assert check == True print "Assertion test PASSED" print "Testing ReasonConjunction objects" conceptdb.connect_to_mongodb(db1) #print "After the merge, db %s has the following assertions: "%db1 for a1 in Assertion.objects.filter(dataset=dataset): #print "assertion: %s"%a1 #print " confidence score: %s"%a1.confidence for r1 in list(ReasonConjunction.objects.filter(target=a1.name)): #print " reason: %s"%r1.factors assert r1.target == a1.name Assertion.drop_collection() Dataset.drop_collection() ReasonConjunction.drop_collection() conceptdb.connect_to_mongodb(db2) #print "After the merge, db %s has the following assertions: "%db2 for a2 in Assertion.objects.filter(dataset=dataset): #print "assertion: %s"%a2 #print " confidence score: %s"%a2.confidence for r2 in list(ReasonConjunction.objects.filter(target=a2.name)): #print " reason: %s"%r2.factors assert r2.target == a2.name print "ReasonConjunction test PASSED" Assertion.drop_collection() Dataset.drop_collection() ReasonConjunction.drop_collection()
def testmerge_check(db1='test1', db2='test2', dataset=None): ''' Check post-merge elements, make sure they match ''' if dataset==None: print "Testing Assertion objects" conceptdb.connect_to_mongodb(db1) db1_assertions = Assertion.objects conceptdb.connect_to_mongodb(db2) db2_assertions = Assertion.objects for db1_a in [a1 for a1 in list(db1_assertions) if a1 not in list(db2_assertions)]: check = False for db2_check in list(db2_assertions): if assertion_check(db1_a, db2_check): check = True assert check==True for db2_a in [a2 for a2 in list(db2_assertions) if a2 not in list(db1_assertions)]: check = False for db1_check in list(db1_assertions): if assertion_check(db2_a, db1_check): check = True assert check==True print "Assertion test PASSED" print "Testing ReasonConjunction objects" conceptdb.connect_to_mongodb(db1) #print "After the merge, db %s has the following assertions: "%db1 for a1 in Assertion.objects: #print "assertion: %s"%a1 #print " confidence score: %s"%a1.confidence for r1 in list(ReasonConjunction.objects.filter(target=a1.name)): #print " reason: %s"%r1.factors assert r1.target == a1.name Assertion.drop_collection() Dataset.drop_collection() ReasonConjunction.drop_collection() conceptdb.connect_to_mongodb(db2) #print "After the merge, db %s has the following assertions: "%db2 for a2 in Assertion.objects: #print "assertion: %s"%a2 #print " confidence score: %s"%a2.confidence for r2 in list(ReasonConjunction.objects.filter(target=a2.name)): #print " reason: %s"%r2.factors assert r2.target == a2.name print "ReasonConjunction test PASSED" Assertion.drop_collection() Dataset.drop_collection() ReasonConjunction.drop_collection() else: print "Testing Assertion objects" conceptdb.connect_to_mongodb(db1) db1_assertions = Assertion.objects.filter(dataset=dataset) conceptdb.connect_to_mongodb(db2) db2_assertions = Assertion.objects.filter(dataset=dataset) for db1_a in [a1 for a1 in list(db1_assertions) if a1 not in list(db2_assertions)]: check = False for db2_check in list(db2_assertions): if assertion_check(db1_a, db2_check): check = True assert check==True for db2_a in [a2 for a2 in list(db2_assertions) if a2 not in list(db1_assertions)]: check = False for db1_check in list(db1_assertions): if assertion_check(db2_a, db1_check): check = True assert check==True print "Assertion test PASSED" print "Testing ReasonConjunction objects" conceptdb.connect_to_mongodb(db1) #print "After the merge, db %s has the following assertions: "%db1 for a1 in Assertion.objects.filter(dataset=dataset): #print "assertion: %s"%a1 #print " confidence score: %s"%a1.confidence for r1 in list(ReasonConjunction.objects.filter(target=a1.name)): #print " reason: %s"%r1.factors assert r1.target == a1.name Assertion.drop_collection() Dataset.drop_collection() ReasonConjunction.drop_collection() conceptdb.connect_to_mongodb(db2) #print "After the merge, db %s has the following assertions: "%db2 for a2 in Assertion.objects.filter(dataset=dataset): #print "assertion: %s"%a2 #print " confidence score: %s"%a2.confidence for r2 in list(ReasonConjunction.objects.filter(target=a2.name)): #print " reason: %s"%r2.factors assert r2.target == a2.name print "ReasonConjunction test PASSED" Assertion.drop_collection() Dataset.drop_collection() ReasonConjunction.drop_collection()
def merge(db1, db2, dataset=None): ''' Loop over both of the DBs to find assertions that are not present in each, and reasons that point to those assertions ''' db1_tobeadded = [] db2_tobeadded = [] if dataset == None: conceptdb.connect_to_mongodb(db1) db1_assertions = Assertion.objects conceptdb.connect_to_mongodb(db2) db2_assertions = Assertion.objects else: conceptdb.connect_to_mongodb(db1) db1_assertions = Assertion.objects.filter(dataset=dataset) conceptdb.connect_to_mongodb(db2) db2_assertions = Assertion.objects.filter(dataset=dataset) # Looping to find assertions in DB1 that are not in DB2 for db1_a in [a1 for a1 in list(db1_assertions) if a1 not in list(db2_assertions)]: conceptdb.connect_to_mongodb(db1) # New assertions, along with the reasons db2_tobeadded.append((db1_a, ReasonConjunction.objects.filter(target=db1_a.name))) # Check that each assertion does not exist in the DB with a different Assertion ID for db2_check in list(db2_assertions): if assertion_check(db1_a, db2_check): # Do not add multiple assertions db2_tobeadded.pop() # But DO add new reasons that point to existing assertions if ReasonConjunction.objects.filter(target=db1_a.name) is not None: db2_tobeadded.append((None, (ReasonConjunction.objects.filter(target=db1_a.name), db2_check))) break # Looping to find assertions in DB2 that are not in DB1 for db2_a in [a2 for a2 in list(db2_assertions) if a2 not in list(db1_assertions)]: conceptdb.connect_to_mongodb(db2) # New assertions, along with the reasons db1_tobeadded.append((db2_a, ReasonConjunction.objects.filter(target=db2_a.name))) # Check that each assertion does not exist in the DB with a different Assertion ID for db1_check in list(db1_assertions): if assertion_check(db2_a, db1_check): # Do not add multiple assertions db1_tobeadded.pop() # But DO add new reasons that point to existing assertions if ReasonConjunction.objects.filter(target=db2_a.name) is not None: db1_tobeadded.append((None, (ReasonConjunction.objects.filter(target=db2_a.name), db1_check))) break ''' Step through db1_tobeadded and db2_tobeadded, lists of elements that have to be added from each DB to the other, and add all of the assertions and corresponding reasons to the DBs ''' # Adding to DB1 conceptdb.connect_to_mongodb(db1) for (add1,rel1) in db1_tobeadded: if add1 == None: for r1 in list(rel1[0]): factors_db1 = [] for factor in r1.factors: if type(factor) == Assertion: factors_db1.append(db1_assertions.create( dataset=factor.dataset, relation=factor.relation, polarity=factor.polarity, argstr=factor.argstr, context=factor.context, complete=1)) else: factors_db1.append(factor) if ReasonConjunction.objects.filter(target=rel1[1],factors=factors_db1,weight=r1.weight) ==None: rel1[1].add_support(factors_db1) continue ass1 = db1_assertions.create( dataset=add1.dataset, relation=add1.relation, polarity=add1.polarity, argstr=add1.argstr, context=add1.context, complete=1 ) for r1 in list(rel1): factors_db1 = [] for factor in r1.factors: if type(factor) == Assertion: factors_db1.append(db1_assertions.create( dataset=factor.dataset, relation=factor.relation, polarity=factor.polarity, argstr=factor.argstr, context=factor.context, complete=1)) else: factors_db1.append(factor) ass1.add_support(factors_db1) # Adding to DB2 conceptdb.connect_to_mongodb(db2) for (add2,rel2) in db2_tobeadded: if add2 == None: for r2 in list(rel2[0]): factors_db2 = [] for factor in r2.factors: if type(factor) == Assertion: factors_db2.append(db2_assertions.create( dataset=factor.dataset, relation=factor.relation, polarity=factor.polarity, argstr=factor.argstr, context=factor.context, complete=1)) else: factors_db2.append(factor) if ReasonConjunction.objects.filter(target=rel2[1],factors=factors_db2,weight=r2.weight) ==None: rel2[1].add_support(factors_db2) continue ass2 = db2_assertions.create( dataset=add2.dataset, relation=add2.relation, polarity=add2.polarity, argstr=add2.argstr, context=add2.context, complete=1 ) for r2 in list(rel2): factors_db2 = [] for factor in r2.factors: if type(factor) == Assertion: factors_db2.append(db2_assertions.create( dataset=factor.dataset, relation=factor.relation, polarity=factor.polarity, argstr=factor.argstr, context=factor.context, complete=1)) else: factors_db2.append(factor) ass2.add_support(factors_db2) return (db1_assertions, db2_assertions)
from piston.utils import throttle, rc from piston.authentication import HttpBasicAuthentication from conceptdb.assertion import Assertion, Sentence, Expression from conceptdb.metadata import Dataset from conceptdb.justify import ReasonConjunction from conceptdb.freebase_imports import MQLQuery from conceptdb import ConceptDBDocument from conceptdb.db_merge import merge import conceptdb from mongoengine.queryset import DoesNotExist from mongoengine.base import ValidationError from csc.conceptnet.models import User basic_auth = HttpBasicAuthentication() conceptdb.connect_to_mongodb('test') #NOTE: change when not testing class ConceptDBHandler(BaseHandler): """The ConceptDBHandler deals with all accesses to the conceptdb from the api. A GET to it can return a dataset, assertion, or reason. Searching for a concept will return the top ranked assertions that the concept is part of. A POST to it can create an assertion or vote on one.""" allowed_methods = ('GET', 'POST') @throttle(600, 60, 'read') def read(self, request, obj_url): obj_url = '/' + obj_url if obj_url.startswith('/data'): #try to find matching dataset