Ejemplo n.º 1
0
def parser(def_model='l2svm', def_loss='f1score', def_verbose=True):
    
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option("-m", "--model", dest="model", default='l2 square', type='string',
                      #choices=known_models, 
                      help="model selection from {%s}"%', '.join(s for s in known_models))
    parser.add_option("-i", "--indir", dest="indir",
                  help="input directory", metavar="INDIR")
    parser.add_option("-o", "--outdir", dest="outdir",
                  help="output directory", metavar="OUTDIR")
    parser.add_option("-l", "--lscore", dest="scorer", default='hamming', type='choice',
                      choices=['hamming', 'rankloss'], help="score type, hamming or rankloss (def=rankloss)")
    #parser.add_option("-s", "--shuffle", dest="shuffle", default=False,
    #                  action="store_true", help="shuffle labels (for random baseline)")
    parser.add_option("-t", "--toy", default="0", type="choice",
                      choices=["0", "1"], dest="istoy", 
                      help='generate toy data instead of real data (-t 1) or no (-t 0)' )
    parser.add_option("-q", "--quiet", default=False,
                      action="store_true", dest="quiet")
    
    (options, args) = parser.parse_args()
    
    # extract options
    model_all   = options.model # model type
    #shuffle = options.shuffle # shuffle yes/no
    verbose = not options.quiet
    scorer  = options.scorer
    istoy   = int(options.istoy) # yes/no running smaller test data
    
    if istoy:
        cv_dir = os.path.expanduser(os.path.join("~", "workdir", "multilabel", "toy", "results", "doublecv"))
    else:
        cv_dir = os.path.expanduser(os.path.join("~", "workdir", "multilabel", "results", "doublecv"))
    
    resultdir = os.path.join(cv_dir, "processed")
        
    #if shuffle:
    #    cv_dir = os.path.join(cv_dir, "shuffle")
    
    if options.indir is not None:
        cv_dir = options.indir
    if options.outdir is not None:
        resultdir = options.outdir
    
    # make result directory
    try:
        os.makedirs(resultdir)
    except:
        pass
    
    # extract model name
    model = clean_name(model_all)
    
    saveinfo = {'resultdir':resultdir}
    runinfo  = {'model':model, "cv_dir":cv_dir, "scorer":scorer}#, "shuffle":shuffle}
    
    return saveinfo, runinfo, verbose
Ejemplo n.º 2
0
def main():
    usage = "usage: %prog [options] arg"
    parser = OptionParser(usage)
    parser.add_option("-m", "--model", dest="model", default='l2 svm', type='string',
                      #choices=known_models, 
                      help="model selection from {%s}"%', '.join(s for s in known_models))
    parser.add_option("-i", "--indir", dest="indir",
                  help="input directory", metavar="INDIR")
    parser.add_option("-o", "--outdir", dest="outdir",
                  help="output directory", metavar="OUTDIR")
    parser.add_option("-c", "--cv", dest="CV", type="int", default=0,
                  help="which CV set (typically 0...4)")
    parser.add_option("-l", "--lscore", dest="scorer", default='hamming', type='choice',
                      choices=['hamming', 'rankloss'], help="score type, hamming or rankloss (def=rankloss)")
    parser.add_option("-p", dest="param", type=float, default=10,
                  help="number of model hyperparameter to test (def=10)")
    parser.add_option("-n", "--n_jobs", dest="n_jobs", type=int, default=6,
                  help="number of processors used in parallel fit (def=6)")
    parser.add_option("-s", "--shuffle", dest="shuffle", default=False,
                      action="store_true", help="shuffle labels (for random baseline)")
    parser.add_option("-t", "--toy", default="1", type="choice",
                      choices=["0", "1"], dest="istoy", 
                      help='generate toy data instead of real data (-t 1) or no (-t 0)' )
    parser.add_option("-q", "--quiet", default=False,
                      action="store_true", dest="quiet")
    
    (options, args) = parser.parse_args()
    
    # extract options
    model_all   = options.model # model type
    cv      = options.CV    # which cv set
    p       = options.param # number of params
    shuffle = options.shuffle # shuffle yes/no
    verbose = 0 if options.quiet else 1
    scorer  = options.scorer
    istoy   = int(options.istoy) # yes/no running smaller test data
    n_jobs  = options.n_jobs # number f parallel fit jobs
    
    datadir = os.path.expanduser(os.path.join("~", "workdir", "data", "openfMRI", "preprocessed"))
    
    if istoy:
        resultdir = os.path.expanduser(os.path.join("~", "workdir", "multilabel", "toy", "results", "wardcv"))
    else:
        resultdir = os.path.expanduser(os.path.join("~", "workdir", "multilabel", "results", "wardcv"))
        
    if shuffle:
        resultdir = os.path.join(resultdir, "shuffle")
        
    if options.indir is not None:
        datadir = options.indir
    if options.outdir is not None:
        resultdir = options.outdir
    
    # make result directory
    try:
        os.makedirs(resultdir)
    except:
        pass
    
    # extract model name
    model = clean_name(model_all)
    
    # run it!
    ###################################################
    # extract_data
    if verbose:
        print "Extracting data..." 
        T = time.time()
    
    Xt, Xe, Z, train, test, subtrain, subtest = extract_data(datadir, cv, verbose, istoy)
    
    if verbose:
        dims = Xt.shape+(Z.shape[1],)
        print "data dims: N=%d, D=%d, K=%d"%dims
    
    if verbose: 
        print "extract time = %s"%(time.time()-T,)
        if shuffle: print "shuffling output labels..."
    
    if verbose: 
        print "Training model...\nmodel=%s, cv=%d, scorer=%s"%(model, cv, scorer)
        T = time.time()
    
    # run model (or multimodel). 
    saveinfo = {'resultdir':resultdir, 'cv':cv}
    runinfo = {'model':model, "scorer":scorer, "p":p, "shuffle":shuffle, "n_jobs":n_jobs}
    data = (Xt, Xe, Z, train, test, subtrain, subtest)
    train_multilabel(data, runinfo, saveinfo, verbose)
    if verbose: print "Combined Train time = %s"%(time.time()-T,)