コード例 #1
0
ファイル: import_nfp_data.py プロジェクト: tillberg/RapidRec
    with open(filename) as f:
        for line in f:
            yield line.strip()


db = cachedb.CacheDB()

if __name__ == "__main__":
    datasets = {"mini": "testmini", "all": "testbig"}
    datasetlengths = {"all": 99072112, "mini": 4692576}
    for dataset in ["all", "mini"]:
        length = datasetlengths[dataset]
        records = load_records("nfp/%s.txt" % dataset)
        sitename = datasets[dataset]
        print
        print "Inserting %i records into %s" % (length, sitename)
        i = 0
        for record in records:
            if i % 1000 == 0:
                print_status("Inserting record %i" % i, i, length)
            i += 1
            (user, movie, rating, date) = record.split(",")
            datestamp = "%s%s" % (date.replace("-", ""), "120000000")
            db.insert_without_timestamp(sitename, ("user", user), ("movie", movie), ("rating", rating, datestamp))
        print_status()
        print "Finished importing %s" % dataset
        time.sleep(5)

while True:
    time.sleep(1)
コード例 #2
0
ファイル: test_nfp_data.py プロジェクト: tillberg/RapidRec
#print type(l)
#print l

if __name__ == '__main__':
    datasets = {'mini': 'testmini', 'all': 'testbig'} 
    datasetlengths = {'all': 99072112, 'mini': 4692576}
    for dataset in ['mini', 'all']:
        length = datasetlengths[dataset]
        records = load_records('nfp/%s.txt' % dataset)
        sitename = datasets[dataset]
        print
        print 'Testing %i records into %s' % (length, sitename)
        i = 0
        users = {}
        for record in records:
            if (i % 1000 == 0): print_status('Testing record %i' % i, i, length)
            i += 1
            (user, movie, rating, date) = record.split(',')
            if user not in users:
                users[user] = db.get_key(sitename, ('user', user))
            #record = db.get_subkey(sitename, ('user', user), ('movie', movie))
            record = users[user][('movie', movie)]
            datestamp = '%s%s' % (date.replace('-', ''), '120000000')
            expected = [('rating', rating, datestamp)]
            test = (record == expected)
            if not test:
                print 'fail: %s != %s' % (record, expected)
            #ns.insert(('user', user), ('movie', movie), ('rating', rating, date), add_timestamp=False)
        print_status()
        print 'Finished testing %s' % dataset
        time.sleep(5)