def runTest():
    train = 'train.csv'
    test = 'test.csv'
    output = 'prediciton.txt'
    M = readHelper.readTrainMemory(train)
    query, tuples = readHelper.readQueryMemory(test)
    k = 100
    func = 'dotp'
    func_w = 'mean'
    pred = memory.pccMemoryCF(M, query, k, func, func_w)
    writeHelper.writePredMemory(output, pred, tuples)
    return
def main():
    start_time = time.time()
    parser = argparse.ArgumentParser(description = "collaborative filtering")
    parser.add_argument("-t", help = "if running test", action = 'store_true', default = False)
    parser.add_argument("-m", help = "memory-based or model collaborative filtering", choices = ['memory','model'])
    parser.add_argument("-k", help = "number of k nearest neighborhood", type = int, choices = [10,100,500])
    parser.add_argument("-s", help = "similarity metric used for knn ", choices = ['dotp','cosine'])
    parser.add_argument("-w", help = "approach for combining prediction given knn", choices = ['mean','weight'])
    parser.add_argument("-p", help = "if standardization used", action = 'store_true', default = False)
    parser.add_argument("-b", help = "if bipartite clustering userd", action = "store_true", default = False)
    args = parser.parse_args()
    print args

    if not args.t:
        output = "-".join([args.m, str(args.k), args.s, args.w])
        if args.p:
            output += '-pcc'
        if args.b:
            output += '-bi'
        output += '.txt'
        train = 'train.csv'
        dev = 'dev.csv'

        if args.m == 'memory' and not args.b:
            M = readHelper.readTrainMemory(train)
            query, tuples = readHelper.readQueryMemory(dev)
            if args.p:
                pred = memory.pccMemoryCF(M, query, args.k, args.s, args.w)
            else:
                pred = memory.memoryCF(M, query, args.k, args.s, args.w)

            writeHelper.writePredMemory(output, pred, tuples)
        elif args.m == 'model' and not args.b:
            M = readHelper.readTrainModel(train)
            tuples = readHelper.readQueryModel(dev)
            if args.p:
                pred = model.pccModelCF(M, tuples, args.k, args.s, args.w)
            else:
                pred = model.modelCF(M, tuples, args.k, args.s, args.w)
            writeHelper.writePredModel(output, pred)

        if args.b:
            M = readHelper.readTrainModel(train)
            tuples = readHelper.readQueryModel(dev)
            if args.m == 'model':
                pred = BiCF.bi_item(M, tuples, args.k, args.s, args.w)
            else:
                pred = BiCF.bi_user(M, tuples, args.k, args.s, args.w)
            writeHelper.writePredModel(output, pred)

        print 'time: %s seconds' % (time.time()-start_time)
    else:
        runTest()
Exemple #3
0
def runTest():
    train = 'train.csv'
    test = 'test.csv'
    output = 'prediciton.txt'
    M = readHelper.readTrainMemory(train)
    query, tuples = readHelper.readQueryMemory(test)
    k = 100
    func = 'dotp'
    func_w = 'mean'
    pred = memory.pccMemoryCF(M, query, k, func, func_w)
    writeHelper.writePredMemory(output, pred, tuples)
    return
Exemple #4
0
def main():
    start_time = time.time()
    parser = argparse.ArgumentParser(description="collaborative filtering")
    parser.add_argument("-t",
                        help="if running test",
                        action='store_true',
                        default=False)
    parser.add_argument("-m",
                        help="memory-based or model collaborative filtering",
                        choices=['memory', 'model'])
    parser.add_argument("-k",
                        help="number of k nearest neighborhood",
                        type=int,
                        choices=[10, 100, 500])
    parser.add_argument("-s",
                        help="similarity metric used for knn ",
                        choices=['dotp', 'cosine'])
    parser.add_argument("-w",
                        help="approach for combining prediction given knn",
                        choices=['mean', 'weight'])
    parser.add_argument("-p",
                        help="if standardization used",
                        action='store_true',
                        default=False)
    parser.add_argument("-b",
                        help="if bipartite clustering userd",
                        action="store_true",
                        default=False)
    args = parser.parse_args()
    print args

    if not args.t:
        output = "-".join([args.m, str(args.k), args.s, args.w])
        if args.p:
            output += '-pcc'
        if args.b:
            output += '-bi'
        output += '.txt'
        train = 'train.csv'
        dev = 'dev.csv'

        if args.m == 'memory' and not args.b:
            M = readHelper.readTrainMemory(train)
            query, tuples = readHelper.readQueryMemory(dev)
            if args.p:
                pred = memory.pccMemoryCF(M, query, args.k, args.s, args.w)
            else:
                pred = memory.memoryCF(M, query, args.k, args.s, args.w)

            writeHelper.writePredMemory(output, pred, tuples)
        elif args.m == 'model' and not args.b:
            M = readHelper.readTrainModel(train)
            tuples = readHelper.readQueryModel(dev)
            if args.p:
                pred = model.pccModelCF(M, tuples, args.k, args.s, args.w)
            else:
                pred = model.modelCF(M, tuples, args.k, args.s, args.w)
            writeHelper.writePredModel(output, pred)

        if args.b:
            M = readHelper.readTrainModel(train)
            tuples = readHelper.readQueryModel(dev)
            if args.m == 'model':
                pred = BiCF.bi_item(M, tuples, args.k, args.s, args.w)
            else:
                pred = BiCF.bi_user(M, tuples, args.k, args.s, args.w)
            writeHelper.writePredModel(output, pred)

        print 'time: %s seconds' % (time.time() - start_time)
    else:
        runTest()