def runTest(): train = 'train.csv' test = 'test.csv' output = 'prediciton.txt' M = readHelper.readTrainMemory(train) query, tuples = readHelper.readQueryMemory(test) k = 100 func = 'dotp' func_w = 'mean' pred = memory.pccMemoryCF(M, query, k, func, func_w) writeHelper.writePredMemory(output, pred, tuples) return
def main(): start_time = time.time() parser = argparse.ArgumentParser(description = "collaborative filtering") parser.add_argument("-t", help = "if running test", action = 'store_true', default = False) parser.add_argument("-m", help = "memory-based or model collaborative filtering", choices = ['memory','model']) parser.add_argument("-k", help = "number of k nearest neighborhood", type = int, choices = [10,100,500]) parser.add_argument("-s", help = "similarity metric used for knn ", choices = ['dotp','cosine']) parser.add_argument("-w", help = "approach for combining prediction given knn", choices = ['mean','weight']) parser.add_argument("-p", help = "if standardization used", action = 'store_true', default = False) parser.add_argument("-b", help = "if bipartite clustering userd", action = "store_true", default = False) args = parser.parse_args() print args if not args.t: output = "-".join([args.m, str(args.k), args.s, args.w]) if args.p: output += '-pcc' if args.b: output += '-bi' output += '.txt' train = 'train.csv' dev = 'dev.csv' if args.m == 'memory' and not args.b: M = readHelper.readTrainMemory(train) query, tuples = readHelper.readQueryMemory(dev) if args.p: pred = memory.pccMemoryCF(M, query, args.k, args.s, args.w) else: pred = memory.memoryCF(M, query, args.k, args.s, args.w) writeHelper.writePredMemory(output, pred, tuples) elif args.m == 'model' and not args.b: M = readHelper.readTrainModel(train) tuples = readHelper.readQueryModel(dev) if args.p: pred = model.pccModelCF(M, tuples, args.k, args.s, args.w) else: pred = model.modelCF(M, tuples, args.k, args.s, args.w) writeHelper.writePredModel(output, pred) if args.b: M = readHelper.readTrainModel(train) tuples = readHelper.readQueryModel(dev) if args.m == 'model': pred = BiCF.bi_item(M, tuples, args.k, args.s, args.w) else: pred = BiCF.bi_user(M, tuples, args.k, args.s, args.w) writeHelper.writePredModel(output, pred) print 'time: %s seconds' % (time.time()-start_time) else: runTest()
def main(): start_time = time.time() parser = argparse.ArgumentParser(description="collaborative filtering") parser.add_argument("-t", help="if running test", action='store_true', default=False) parser.add_argument("-m", help="memory-based or model collaborative filtering", choices=['memory', 'model']) parser.add_argument("-k", help="number of k nearest neighborhood", type=int, choices=[10, 100, 500]) parser.add_argument("-s", help="similarity metric used for knn ", choices=['dotp', 'cosine']) parser.add_argument("-w", help="approach for combining prediction given knn", choices=['mean', 'weight']) parser.add_argument("-p", help="if standardization used", action='store_true', default=False) parser.add_argument("-b", help="if bipartite clustering userd", action="store_true", default=False) args = parser.parse_args() print args if not args.t: output = "-".join([args.m, str(args.k), args.s, args.w]) if args.p: output += '-pcc' if args.b: output += '-bi' output += '.txt' train = 'train.csv' dev = 'dev.csv' if args.m == 'memory' and not args.b: M = readHelper.readTrainMemory(train) query, tuples = readHelper.readQueryMemory(dev) if args.p: pred = memory.pccMemoryCF(M, query, args.k, args.s, args.w) else: pred = memory.memoryCF(M, query, args.k, args.s, args.w) writeHelper.writePredMemory(output, pred, tuples) elif args.m == 'model' and not args.b: M = readHelper.readTrainModel(train) tuples = readHelper.readQueryModel(dev) if args.p: pred = model.pccModelCF(M, tuples, args.k, args.s, args.w) else: pred = model.modelCF(M, tuples, args.k, args.s, args.w) writeHelper.writePredModel(output, pred) if args.b: M = readHelper.readTrainModel(train) tuples = readHelper.readQueryModel(dev) if args.m == 'model': pred = BiCF.bi_item(M, tuples, args.k, args.s, args.w) else: pred = BiCF.bi_user(M, tuples, args.k, args.s, args.w) writeHelper.writePredModel(output, pred) print 'time: %s seconds' % (time.time() - start_time) else: runTest()