コード例 #1
0
    def run(self):
        for DATASET in self.DATASETS:
            print "==== Importing Dataset %s === " % DATASET["name"]
            XRO = XMLReader.from_dataset(DATASET)
            stats = XRO.get_stats()
            PRINT( stats )
            LOGCSV.writerow(["Dataset Stats Keys", "", XRO.name] + stats.keys())
            LOGCSV.writerow(["Dataset Stats Values", "", XRO.name] + stats.values())
            
            for DBO in self.DBOS:                
                print "Current Db: ", DBO.info
                print "* reset tables"
                DBO.reset()
                
                print "* import dataset"
                DBO.import_XML(XRO)

            print "==== Retrieval Benchmark ===="
            print "* Generating samples"            
            warmup_sample = XRO.get_thread_sample(RUNS)
            thread_sample = XRO.get_thread_sample(RUNS)

            self.LOG_THREADS(thread_sample)

            LOGCSV.writerow(["Thread Lengths", "", XRO.name] +  [ XRO.get_thread_length(thread) for thread in thread_sample] )

            print "* Average Thread Length:", sum([ XRO.get_thread_length(thread) for thread in thread_sample])/float(RUNS)

            for DBO in self.DBOS:
                print "* Current Db: ", DBO.info
                # Warmup run
                print "* warming up caches"
                self.retrieval_benchmark(DBO, warmup_sample, silent = True)
                
                # Benchmark
                results = self.retrieval_benchmark(DBO, thread_sample)
                LOGCSV.writerow(["Retrieval Ticks", DBO.info, DBO.XRO.name] +  results )

            print "=== Edit Post Benchmark ==="
            print "* Generating samples"
            post_sample   = XRO.get_post_sample(1000)
            for DBO in self.DBOS:
                self.edit_post_benchmark(DBO, post_sample)
                
            #print "* Benchmark Add Post"
            #self.add_post_benchmark(DBO)
                
        
        for DBO in DBOS:
            DBO.close()
コード例 #2
0
 def __init__(self, host = 'localhost', port = 27017):
     self.con = pymongo.MongoClient(host, port)
     self.db  = self.con[self.DB_NAME]
     self.tc  = self.db[self.COL_THREAD]
     self.uc  = self.db[self.COL_USER]
     self.XRO  = XMLReader()
コード例 #3
0
class MongoControls:
    # We follow the tutorial
    # http://api.mongodb.org/python/2.4.1/tutorial.html
    
    # Remark:
    # documents are members of collections
    # collections are members of databases

    info = "Mongo"

    DB_NAME = "discuss"
    COL_THREAD = "threads"
    COL_USER   = "******"
    
    def __init__(self, host = 'localhost', port = 27017):
        self.con = pymongo.MongoClient(host, port)
        self.db  = self.con[self.DB_NAME]
        self.tc  = self.db[self.COL_THREAD]
        self.uc  = self.db[self.COL_USER]
        self.XRO  = XMLReader()
    
    def close(self):
        pass
    
    def reset(self):
        self.tc.drop()
        self.uc.drop()

    def insert_thread(self, data):
        # Insert data into thread collection
        # returns id
        try:
            id = self.tc.insert(data)
        except pymongo.errors.DuplicateKeyError:
            id = None
        return id

    def insert_user(self, data):
        # Insert data into user collection
        # returns id
        try:
            id = self.uc.insert(data)
        except pymongo.errors.DuplicateKeyError:
            id = None
        return id

    def get_thread(self, ID):
        thread_rec = self.tc.find_one({"_id": ID})

        user_list  = []
        try:
            for post in thread_rec["posts"]:
                user_list.append([post["userID"]  + USEROFFSET, 
                                  self.uc.find_one({"_id": post["userID"] + USEROFFSET})])
        except KeyError:
            pass
        
        return {"thread":thread_rec, "users": user_list}
    
    def get_user(self, ID):
        return self.uc.find_one({"_id": ID})
    
    
    def fill_users(self, batch_size = 5000):
        buffer = []
        for user in self.XRO.get_users():
            user["_id"] = user["ID"] + USEROFFSET
            buffer.append(user)
            
            if len(buffer) == batch_size:
                if DEBUG: print "Writing user collection", batch_size
                self.insert_user(buffer)
                buffer = []

        self.insert_user(buffer)

    def fill_threads(self, batch_size = 5000):
        buffer = []
        for thread in self.XRO.get_complete_threads():
            thread["_id"] = thread["ID"]
            buffer.append(thread)
            
            if len(buffer) == batch_size:
                if DEBUG: print "Writing thread collection", batch_size
                self.insert_thread(buffer)
                buffer = []

        self.insert_thread(buffer)
    
    def create_indices(self):
        self.uc.create_index("ID")
        self.tc.create_index("ID")
        
    @TimeDec
    def import_XML(self, XRO):
        self.XRO = XRO
        self.fill_threads()
        self.fill_users()

    
    def TEST(self):
        global DEBUG
        DEBUG = 1
        
#        print "Creating test documents"
#        tid = self.insert_thread({"thread":"test", "ID": -1})
#        uid = self.insert_user({"user":"******", "ID": -1})
#        
#        print self.get_thread(-1)
#        print self.get_user(-2)
        
        self.reset()

        print "Populating MongoDB"
        self.fill_users(10000)
        self.fill_threads()

        print self.get_thread(3)
        print self.get_user(3)

        
        print "Creating Indices"
        self.create_indices()