예제 #1
0
def svm_poim(argv):
    """A top level script to parse input parameters and plot poims"""

    assert(argv[1]=='poim')
    if len(argv)<7:sys.stderr.write("usage: %s poim C poimdegree wd [kernelparameters] [arff|fasta] inputfiles  poim.png [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1)

    # parse input parameters
    C = float(argv[2])
    poimdegree = int(argv[3])
    (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], False)
    (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest)
    
    if len(argv_rest)<1:sys.stderr.write("poim.png [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
    if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
    if len(argv_rest)<3:
        if argv_rest[-1] == 'dna':
            sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n")
            sys.exit(-1)
        elif argv_rest[-1] == 'protein':
            sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n")
            sys.exit(-1)
        else:
            sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n")
            sys.exit(-1)
    if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1)
    poimfilename = argv_rest[0]
    seq_source = argv_rest[1]
    nuc_con = argv_rest[2]

    utils.check_params(kparam, C, len(examples[0]))

    # train svm and compute POIMs
    (svm, kernel, feats_train, preproc) = train(examples,labels,C,kernelname,kparam,seq_source,nuc_con)
    print "done with training "
    (poim, max_poim, diff_poim, poim_totalmass) = compute_poims(svm, kernel, poimdegree, len(examples[0]))

    # plot poims
    plots.plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, len(examples[0]))
예제 #2
0
파일: experiment.py 프로젝트: mcopik/shogun
def svm_poim(argv):
    """A top level script to parse input parameters and plot poims"""

    assert(argv[1]=='poim')
    if len(argv)<7:sys.stderr.write("usage: %s poim C poimdegree wd [kernelparameters] [arff|fasta] inputfiles  poim.png [dna|protein] non(nucleotide|amino)converter\n" % argv[0]);sys.exit(-1)

    # parse input parameters
    C = float(argv[2])
    poimdegree = int(argv[3])
    (kernelname,kparam,argv_rest) = parse.parse_kernel_param(argv[4:], False)
    (examples,labels,argv_rest) = parse.parse_input_file_train(kernelname, argv_rest)
    
    if len(argv_rest)<1:sys.stderr.write("poim.png [dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
    if len(argv_rest)<2:sys.stderr.write("[dna|protein] non(nucleotide|amino)converter are missing\n");sys.exit(-1)
    if len(argv_rest)<3:
        if argv_rest[-1] == 'dna':
            sys.stderr.write("non-nucleotide converter like [A|T|C|G|R|Y|N] is missing. Cannot continue.\n")
            sys.exit(-1)
        elif argv_rest[-1] == 'protein':
            sys.stderr.write("non-amino acid converter like [G|P|A|V|L|I|M|C|F|Y|W|H|K|R|Q|N|E|D|S|T|random] is missing. Cannot continue.\n")
            sys.exit(-1)
        else:
            sys.stderr.write("Here expect FASTA sequence type as [dna|protein] instead of -"+ argv_rest[-1] +"- Cannot continue.\n")
            sys.exit(-1)
    if len(argv_rest)>3:sys.stderr.write("Too many arguments\n");sys.exit(-1)
    poimfilename = argv_rest[0]
    seq_source = argv_rest[1]
    nuc_con = argv_rest[2]

    utils.check_params(kparam, C, len(examples[0]))

    # train svm and compute POIMs
    (svm, kernel, feats_train, preproc) = train(examples,labels,C,kernelname,kparam,seq_source,nuc_con)
    (poim, max_poim, diff_poim, poim_totalmass) = compute_poims(svm, kernel, poimdegree, len(examples[0]))

    # plot poims
    plots.plot_poims(poimfilename, poim, max_poim, diff_poim, poim_totalmass, poimdegree, len(examples[0]))