help='Save predictions in filename', required=True) args = parser.parse_args() comm = MPI.COMM_WORLD rank = comm.Get_rank() NumProcessors = comm.Get_size() # load the model (as a dictionary) f = open(args.modelfile, 'rb') model = cPickle.load(f) f.close() # Instantiate the model with the loaded dictionary model = KernelMachine(**model) # Load the data objective = 0 if rank == 0: print "Parsing the data..." data = skylark.io.libsvm(args.testfile).read() X = data[0].todense() # TODO we want to keep it sparse for now. Y = data[1] # If missing features, then augment the data if X.shape[1] < model.RFTs[0].getindim(): fulldim = model.RFTs[0].getindim() n = X.shape[0] partialdim = X.shape[1] X = numpy.concatenate((X, numpy.zeros((n, fulldim - partialdim))),
parser.add_argument("--modelfile", type=str, help='Save model in filename', required=True) parser.add_argument("--outputfile", type=str, help='Save predictions in filename', required=True) args = parser.parse_args() comm = MPI.COMM_WORLD rank = comm.Get_rank() NumProcessors = comm.Get_size() # load the model (as a dictionary) f = open(args.modelfile,'rb') model = cPickle.load(f) f.close() # Instantiate the model with the loaded dictionary model = KernelMachine(**model) # Load the data objective = 0 if rank == 0: print "Parsing the data..." data = skylark.io.libsvm(args.testfile).read() X = data[0].todense() # TODO we want to keep it sparse for now. Y = data[1] # If missing features, then augment the data if X.shape[1] < model.RFTs[0].getindim(): fulldim = model.RFTs[0].getindim() n = X.shape[0] partialdim = X.shape[1] X = numpy.concatenate((X, numpy.zeros((n, fulldim - partialdim))), axis=1)
elem.Copy(Y_cc, Y); #pr = cProfile.Profile() if rank==0: print "Reading and distributing the data toolk %f seconds" % (MPI.Wtime() - starttime) # Create kernel kernel = skylark.ml.kernels.kernel(args.kernel, shape_X[1], **eval(args.kernelparams)) # train the model model = KernelMachine(lossfunction=args.lossfunction, regularizer=args.regularizer, regparam=args.regparam, randomfeatures=args.randomfeatures, kernel=kernel, numfeaturepartitions=args.numfeaturepartitions, TOL=args.TOL, MAXITER=args.MAXITER, zerobased=args.zerobased, subtype=args.subtype) #pr.enable() model.train((X,Y)) #pr.disable() #s = StringIO.StringIO() #sortby = 'cumulative' #ps = pstats.Stats(pr, stream=s).sort_stats(sortby) #ps.print_stats()
#pr = cProfile.Profile() if rank == 0: print "Reading and distributing the data toolk %f seconds" % (MPI.Wtime() - starttime) # Create kernel kernel = skylark.ml.kernels.kernel(args.kernel, shape_X[1], **eval(args.kernelparams)) # train the model model = KernelMachine(lossfunction=args.lossfunction, regularizer=args.regularizer, regparam=args.regparam, randomfeatures=args.randomfeatures, kernel=kernel, numfeaturepartitions=args.numfeaturepartitions, TOL=args.TOL, MAXITER=args.MAXITER, zerobased=args.zerobased, subtype=args.subtype) #pr.enable() model.train((X, Y)) #pr.disable() #s = StringIO.StringIO() #sortby = 'cumulative' #ps = pstats.Stats(pr, stream=s).sort_stats(sortby) #ps.print_stats() #print s.getvalue()