with open(filename) as f: for line in f: yield line.strip() db = cachedb.CacheDB() if __name__ == "__main__": datasets = {"mini": "testmini", "all": "testbig"} datasetlengths = {"all": 99072112, "mini": 4692576} for dataset in ["all", "mini"]: length = datasetlengths[dataset] records = load_records("nfp/%s.txt" % dataset) sitename = datasets[dataset] print print "Inserting %i records into %s" % (length, sitename) i = 0 for record in records: if i % 1000 == 0: print_status("Inserting record %i" % i, i, length) i += 1 (user, movie, rating, date) = record.split(",") datestamp = "%s%s" % (date.replace("-", ""), "120000000") db.insert_without_timestamp(sitename, ("user", user), ("movie", movie), ("rating", rating, datestamp)) print_status() print "Finished importing %s" % dataset time.sleep(5) while True: time.sleep(1)
#print type(l) #print l if __name__ == '__main__': datasets = {'mini': 'testmini', 'all': 'testbig'} datasetlengths = {'all': 99072112, 'mini': 4692576} for dataset in ['mini', 'all']: length = datasetlengths[dataset] records = load_records('nfp/%s.txt' % dataset) sitename = datasets[dataset] print print 'Testing %i records into %s' % (length, sitename) i = 0 users = {} for record in records: if (i % 1000 == 0): print_status('Testing record %i' % i, i, length) i += 1 (user, movie, rating, date) = record.split(',') if user not in users: users[user] = db.get_key(sitename, ('user', user)) #record = db.get_subkey(sitename, ('user', user), ('movie', movie)) record = users[user][('movie', movie)] datestamp = '%s%s' % (date.replace('-', ''), '120000000') expected = [('rating', rating, datestamp)] test = (record == expected) if not test: print 'fail: %s != %s' % (record, expected) #ns.insert(('user', user), ('movie', movie), ('rating', rating, date), add_timestamp=False) print_status() print 'Finished testing %s' % dataset time.sleep(5)