Beispiel #1
0
def main():
    """
    main for pytableaucreate.py

    Usage: pytableaucreate [-35nefuv] [-d|-b] [-t structprog] [-p domainprog]
                [-a domainid]
                [-s sse_num_list] [-c chainid] [-m min_sse_len]
                [-o savefile] <PDBfile>


    -3 specifies to include 3_10 helices in the diagram. Default is only
       alpha helices.

    -5 specifies to include pi helices in the diagram. Defaul is only
       alpha helices.

    -k use the HH and KK codes for respectively antiparallel and parallel
       strands in the same sheet, rather than the O, P etc. codes.
       
    -n output a numeric omega matrix instead of tableau.

    -e output numeric tableau angles in degrees, in the original
       TableauCreator .angles file format, with number of entries on
       first line, SSE sequence description on second line (E/H), then
       (full) matrix with angles in degrees (rather than radians).
       For distance matrix, same format with distances between SSEs
       in Angstroms.

    -f output the matrix in 'FORTRAN style' lower triangle with
       header line suitable for input to TMATN.

    -d build SSE axis midpoint distance matrix rather than tableau.

    -b build both the tableau and distance matrix and output together,
       for use with tsrchd etc. for example. If -u is used to permute
       the matrices, they are permuted the same way so they are still
       consistent.

    -p specify the domain decomposition method.
       Valid values are 'none' (default), 'ddomain', 'cath:cdf_filename'.

    -a domainid : only output specified domain

    -t specifies the secondary structure assignment program to use.
       Currently suppoed is 'pdb' and 'dfh,ssp' and 'stride' or 'pmml'.
       Default 'pdb'.

    -s sse_num_list specifies a comman-separated
       list of SSE sequential ids to build the
       tableau for. SSE sequential id's start at 1 and go from N to C
       terminus. E.g. -s1,5,8 includes only the 1st, 5th and 8ths SSEs.
       Numbers do not restart at chains (but do restart in each domain).
       These nubmers are those assigned by 'ptgraph2 -b sequential' option.

       TODO: this currently does not make sense when multiple domains
       are being procssed, this option applies to each domain.
    
   -c chainid : specify chain identifier; only build tableau for that chain

   -m min_sse_len : minimum nubmer of residues in SSE for it to be included

   -i identifier : when using fortran format (-f), specify the identifier
      to use in the output rather than deriving it from the filename

    -o savefile : save tableau in packed format for use in other
       programs, such as tabsearchqpml.py
       WARNING: savefile is overwritten if it exists

       TODO: this currently does not make sense when multiple domains
       are being procssed, this option only saves first domain.
       
    -u randomly pemute the rows+cols (symmetric) of the tableau/distance matrix.
       writes the permutation vector in form 
       permutation = i,j,..,m
       e.g. 
       permutation = 3,1,2,4
       as first line of output before identifier information and tableau

    -v specifies verbose mode: debugging output is written to stderr.
    """
    global verbose

    try:
        opts, args = getopt.getopt(sys.argv[1:], "35bdfknep:a:t:s:c:m:i:o:uv?")
    except getopt.GetoptError:
        usage(os.path.basename(sys.argv[0]))

    valid_secstruct_programs = ["dssp", "stride", "pdb", "pmml"]
    valid_domain_programs = getdomains.valid_domain_programs + [r"none"]
    valid_domain_programs_re = [
        re.compile(re_str) for re_str in valid_domain_programs
    ]

    verbose = False  # global (python globals are only 'global' to module though)
    secstruct_program = "pdb"
    include_310_helices = False
    include_pi_helices = False
    domain_program = "none"
    sse_id_list = None
    use_numeric = False
    use_hk = False
    savefilename = None
    min_sse_len = None
    fortran_format = False
    build_distance_matrix = False
    chainid = None
    fident = None
    do_shuffle = False
    build_both = False  # both tableau and dist matrix
    use_old_format = False  # size + SSE chain + degrees omega matrix
    domainid = None

    for opt, arg in opts:
        if opt == "-3":  # include 3_10 helices
            include_310_helices = True
        elif opt == "-5":  # include pi helices
            include_pi_helices = True
        elif opt == "-d":  # build SSE midpoint distance matrix not tableau
            build_distance_matrix = True
        elif opt == "-b":  # build both tableau and distance matrix
            build_both = True
        elif opt == "-k":  # use HH and KK codes
            use_hk = True
        elif opt == "-n":  # output numeric matrix not tableau
            use_numeric = True
        elif opt == "-e":  # use TableauCreator .angles file format
            use_old_format = True
        elif opt == "-f":  # FORTRAN style format for TMATN
            fortran_format = True
        elif opt == "-p":  # domain parsing program
            domain_program = None
            for valid_domarg_re in valid_domain_programs_re:
                if valid_domarg_re.match(arg):
                    domain_program = arg
                    break
            if domain_program == None:
                sys.stderr.write("valid values for -p are: " +
                                 str(valid_domain_programs) + "\n")
                usage(sys.argv[0])
        elif opt == "-a":  # only output tableau for specified domain id
            domainid = arg
        elif opt == "-t":
            if arg not in valid_secstruct_programs:
                sys.stderr.write("valid values for -t are: " +
                                 str(valid_secstruct_programs) + "\n")
                usage(sys.argv[0])
            secstruct_program = arg
        elif opt == "-s":
            sse_id_list_str = arg.split(',')
            sse_id_list = []
            sse_id_uniq_dict = {}  # { id : True } just for checking all unique
            for sse_id_str in sse_id_list_str:
                if sse_id_str.isdigit():
                    if sse_id_uniq_dict.has_key(int(sse_id_str)):
                        sys.stderr.write("duplicate SSE sequential number " +
                                         sse_id_str + "\n")
                        usage(sys.argv[0])
                    sse_id_uniq_dict[int(sse_id_str)] = True
                    sse_id_list.append(int(sse_id_str))
                else:
                    sys.stderr.write("not a valid SSE sequential number '" +
                                     sse_id_str + "'\n")
                    usage(sys.argv[0])
            sse_id_list.sort()  # ensure SSEs are in order
        elif opt == "-c":  # chain identifier
            if len(arg) != 1:
                sys.stderr.write("invalid chain identifier for -c option\n")
                usage(sys.argv[0])
            chainid = arg.upper()
        elif opt == "-m":  # min sse len
            min_sse_len = int(arg)
        elif opt == "-i":  # identifier to use for fortran format
            fident = arg
        elif opt == "-o":  # save tableau in packed format
            savefilename = arg
        elif opt == "-u":  # randomly permute the tableau/matrix
            do_shuffle = True
        elif opt == "-v":  # verbose
            verbose = True  # this module only
            ptnode_set_verbose(True)  # ptnode module
            ptsecstruct.ptsecstruct_set_verbose(True)  # ptsecstruct module
            ptdomain_set_verbose(True)  # ptdomain module
        else:
            usage(sys.argv[0])

    if use_numeric and use_hk:
        sys.stderr.write("-n (numeric) and -k (use HH and KK codes) are "
                         "mutually exlusive\n")
        usage(sys.argv[0])

    if build_distance_matrix and build_both:
        sys.stderr.write("WARNING: both -d (build dist matrix) and -b "
                         "(build both) specified, ignoring -d\n")
        build_distance_matrix = False

    if savefilename and do_shuffle:
        sys.stderr.write('WARNING: saved tableau will not be shuffled\n')

    if build_distance_matrix:
        if use_numeric:
            use_numeric = False
            sys.stderr.write(
                "WARNING: -n (numeric) ignored for -d (distance matrix)\n")
        if use_hk:
            sys.stderr.write(
                "-k (use HH and KK) invalid for -d (distance matrix)\n")
            usage(sys.argv[0])

    if (secstruct_program == "pmml"
            and (min_sse_len == None or min_sse_len < 3)):
        sys.stderr.write(
            "WARNING: PMML can give SSEs of length 1 or 2 causing axis fitting to fail, setting minimum length to 3 as if -m3 were specfified\n"
        )
        min_sse_len = 3

    if fident:
        if not fortran_format:
            sys.stderr.write("-i is only valid with -f\n")
            usage(sys.argv[0])
        elif len(fident) > 8:
            sys.stderr.write("identifier must be 8 chars or less\n")
            usage(sys.argv[0])

    if use_old_format and (build_both or use_hk or use_numeric
                           or fortran_format or do_shuffle or savefilename):
        sys.stderr.write("-e (use old .angles format) is not compatible "
                         "with -b -k or -n or -f or -u or -o\n")
        usage(os.path.basename(sys.argv[0]))

    if len(args) != 1:
        usage(os.path.basename(sys.argv[0]))

    pdb_filename = args[0]

    # check for compressed files. We only support gzip (.gz)
    # Note we are not using the zlib or GzipFile python modules
    # since we are calling to external programs which require the
    # file uncompressed themsevles anyway so we'll just run gzip
    # to uncompress the file to a temporary directory.
    pdb_file_basename = os.path.basename(pdb_filename)
    (name, extension) = os.path.splitext(pdb_file_basename)
    if extension == '.gz':
        TMPDIR = os.tempnam(None, "ptgz")
        os.mkdir(TMPDIR)
        tmp_pdbfilename = os.path.join(TMPDIR, name)
        os.system("gzip " + pdb_filename + " -d -c > " + tmp_pdbfilename)
        our_pdb_filename = tmp_pdbfilename
        used_tmp_file = True
    else:
        our_pdb_filename = pdb_filename
        used_tmp_file = False

    try:
        if fortran_format and fident:
            pdbid = fident
        else:
            pdbid = name.upper()
            if len(pdbid) >= 6 and pdbid[:3] == "PDB":
                pdbid = pdbid[3:7]
            if chainid:
                pdbid += '_' + chainid

        # parse PDB file
        pdb_parser = PDBParser()
        pdb_struct = pdb_parser.get_structure(pdbid, our_pdb_filename)
        # create the Tableaux and output them
        (tableaux_list,
         ssestr_list) = make_tableaux(our_pdb_filename, pdb_struct,
                                      secstruct_program, domain_program,
                                      include_310_helices, include_pi_helices,
                                      (use_numeric or use_old_format),
                                      sse_id_list, use_hk, min_sse_len,
                                      build_distance_matrix, chainid, domainid)
        if build_both:
            (distmatrix_list, ssestr_list) = make_tableaux(
                our_pdb_filename,
                pdb_struct,
                secstruct_program,
                domain_program,
                include_310_helices,
                include_pi_helices,
                use_numeric,
                sse_id_list,
                use_hk,
                min_sse_len,
                True,  # build_distance_matrix
                chainid,
                domainid)
        i = 1
        for tableau in tableaux_list:
            n = len(tableau)
            permutation = range(
                n)  # used to permute rows/cols: null permutation
            if do_shuffle:
                random.shuffle(
                    permutation)  # actually permute for shuffle mode
                if verbose:
                    sys.stderr.write('permutation is: ' + str(permutation) +
                                     '\n')
                sys.stdout.write('permutation = ' +
                                 ','.join([str(x + 1)
                                           for x in permutation]) + '\n')
            if i > 1:
                sys.stdout.write('\ndomain ' + str(i) + ':\n')

            if fortran_format:
                sys.stdout.write("%7.7s %4d\n" % (pdbid.upper(), n))

            if use_old_format:
                if build_distance_matrix:
                    write_distmatrix_old_format(n, tableau, ssestr_list[i - 1])
                else:
                    write_tableau_old_format(n, tableau, ssestr_list[i - 1])
            else:
                write_tableau(n, tableau, permutation, use_numeric,
                              fortran_format, build_distance_matrix)

            if build_both:
                write_tableau(n, distmatrix_list[i - 1], permutation,
                              use_numeric, fortran_format, True)

            i += 1
    finally:
        if used_tmp_file:
            cleanup_tmpdir(TMPDIR)

    if savefilename:
        if verbose:
            sys.stderr.write('writing tableau to ' + savefilename + '\n')
        fh = open(savefilename, "w")
        if len(tableaux_list) > 1:
            sys.stderr.write('WARNING: only saving first tableau in list\n')
        if build_distance_matrix:
            pickle.dump(distmatrix, fh)
        elif use_numeric:
            # Numeric/numpy seems to have no 'packed' format for symmetric
            # matrices, so we just have to dump the whole thing.
            pickle.dump(Omega, fh)
        else:
            pickle.dump(PTTableauPacked(tableaux_list[0]), fh)
        fh.close()
def main():
    """
    main for pytableaucreate.py

    Usage: pytableaucreate [-35nefuv] [-d|-b] [-t structprog] [-p domainprog]
                [-a domainid]
                [-s sse_num_list] [-c chainid] [-m min_sse_len]
                [-o savefile] <PDBfile>


    -3 specifies to include 3_10 helices in the diagram. Default is only
       alpha helices.

    -5 specifies to include pi helices in the diagram. Defaul is only
       alpha helices.

    -k use the HH and KK codes for respectively antiparallel and parallel
       strands in the same sheet, rather than the O, P etc. codes.
       
    -n output a numeric omega matrix instead of tableau.

    -e output numeric tableau angles in degrees, in the original
       TableauCreator .angles file format, with number of entries on
       first line, SSE sequence description on second line (E/H), then
       (full) matrix with angles in degrees (rather than radians).
       For distance matrix, same format with distances between SSEs
       in Angstroms.

    -f output the matrix in 'FORTRAN style' lower triangle with
       header line suitable for input to TMATN.

    -d build SSE axis midpoint distance matrix rather than tableau.

    -b build both the tableau and distance matrix and output together,
       for use with tsrchd etc. for example. If -u is used to permute
       the matrices, they are permuted the same way so they are still
       consistent.

    -p specify the domain decomposition method.
       Valid values are 'none' (default), 'ddomain', 'cath:cdf_filename'.

    -a domainid : only output specified domain

    -t specifies the secondary structure assignment program to use.
       Currently suppoed is 'pdb' and 'dfh,ssp' and 'stride' or 'pmml'.
       Default 'pdb'.

    -s sse_num_list specifies a comman-separated
       list of SSE sequential ids to build the
       tableau for. SSE sequential id's start at 1 and go from N to C
       terminus. E.g. -s1,5,8 includes only the 1st, 5th and 8ths SSEs.
       Numbers do not restart at chains (but do restart in each domain).
       These nubmers are those assigned by 'ptgraph2 -b sequential' option.

       TODO: this currently does not make sense when multiple domains
       are being procssed, this option applies to each domain.
    
   -c chainid : specify chain identifier; only build tableau for that chain

   -m min_sse_len : minimum nubmer of residues in SSE for it to be included

   -i identifier : when using fortran format (-f), specify the identifier
      to use in the output rather than deriving it from the filename

    -o savefile : save tableau in packed format for use in other
       programs, such as tabsearchqpml.py
       WARNING: savefile is overwritten if it exists

       TODO: this currently does not make sense when multiple domains
       are being procssed, this option only saves first domain.
       
    -u randomly pemute the rows+cols (symmetric) of the tableau/distance matrix.
       writes the permutation vector in form 
       permutation = i,j,..,m
       e.g. 
       permutation = 3,1,2,4
       as first line of output before identifier information and tableau

    -v specifies verbose mode: debugging output is written to stderr.
    """
    global verbose
    
    try:
        opts, args = getopt.getopt(sys.argv[1:], "35bdfknep:a:t:s:c:m:i:o:uv?")
    except getopt.GetoptError:
        usage(os.path.basename(sys.argv[0]))

    valid_secstruct_programs = ["dssp", "stride", "pdb", "pmml"]
    valid_domain_programs = getdomains.valid_domain_programs + [r"none"]
    valid_domain_programs_re = [ re.compile(re_str) for re_str in
                                 valid_domain_programs ]

    verbose = False # global (python globals are only 'global' to module though)
    secstruct_program = "pdb"
    include_310_helices = False
    include_pi_helices = False
    domain_program = "none"
    sse_id_list = None
    use_numeric = False
    use_hk = False
    savefilename = None
    min_sse_len = None
    fortran_format = False
    build_distance_matrix = False
    chainid = None
    fident = None
    do_shuffle = False
    build_both = False # both tableau and dist matrix
    use_old_format = False # size + SSE chain + degrees omega matrix
    domainid = None

    for opt,arg in opts:
        if opt == "-3":   # include 3_10 helices
            include_310_helices = True
        elif opt == "-5": # include pi helices
            include_pi_helices = True
        elif opt == "-d":  # build SSE midpoint distance matrix not tableau
            build_distance_matrix = True
        elif opt == "-b": # build both tableau and distance matrix
            build_both = True
        elif opt == "-k": # use HH and KK codes
            use_hk = True
        elif opt == "-n": # output numeric matrix not tableau
            use_numeric = True
        elif opt == "-e": # use TableauCreator .angles file format
            use_old_format = True
        elif opt == "-f":  # FORTRAN style format for TMATN
            fortran_format = True
        elif opt == "-p": # domain parsing program
            domain_program = None
            for valid_domarg_re in valid_domain_programs_re:
                if valid_domarg_re.match(arg):
                    domain_program = arg
                    break
            if domain_program == None:
                sys.stderr.write("valid values for -p are: " +
                                 str(valid_domain_programs) + "\n")
                usage(sys.argv[0])
        elif opt == "-a":  # only output tableau for specified domain id
            domainid = arg
        elif opt == "-t":
            if arg not in valid_secstruct_programs:
                sys.stderr.write("valid values for -t are: " +
                                 str(valid_secstruct_programs) + "\n")
                usage(sys.argv[0])
            secstruct_program = arg
        elif opt == "-s":
            sse_id_list_str = arg.split(',')
            sse_id_list = []
            sse_id_uniq_dict = {} # { id : True } just for checking all unique
            for sse_id_str in sse_id_list_str:
                if sse_id_str.isdigit():
                    if sse_id_uniq_dict.has_key(int(sse_id_str)):
                        sys.stderr.write("duplicate SSE sequential number "  +
                                         sse_id_str + "\n")
                        usage(sys.argv[0])
                    sse_id_uniq_dict[int(sse_id_str)] = True
                    sse_id_list.append(int(sse_id_str))
                else:
                    sys.stderr.write("not a valid SSE sequential number '" +
                                     sse_id_str + "'\n")
                    usage(sys.argv[0])
            sse_id_list.sort() # ensure SSEs are in order
        elif opt == "-c": # chain identifier
            if len(arg) != 1:
                sys.stderr.write("invalid chain identifier for -c option\n")
                usage(sys.argv[0])
            chainid = arg.upper()
        elif opt == "-m": # min sse len
            min_sse_len = int(arg)
        elif opt == "-i": # identifier to use for fortran format
            fident = arg
        elif opt == "-o": # save tableau in packed format
            savefilename = arg
        elif opt == "-u": # randomly permute the tableau/matrix
            do_shuffle = True
        elif opt == "-v": # verbose
            verbose = True # this module only
            ptnode_set_verbose(True) # ptnode module
            ptsecstruct.ptsecstruct_set_verbose(True) # ptsecstruct module
            ptdomain_set_verbose(True) # ptdomain module
        else:
            usage(sys.argv[0])

    if use_numeric and use_hk:
        sys.stderr.write("-n (numeric) and -k (use HH and KK codes) are "
                         "mutually exlusive\n")
        usage(sys.argv[0])

    if build_distance_matrix and build_both:
        sys.stderr.write("WARNING: both -d (build dist matrix) and -b "
                         "(build both) specified, ignoring -d\n")
        build_distance_matrix = False

    if savefilename and do_shuffle:
        sys.stderr.write('WARNING: saved tableau will not be shuffled\n')

    if build_distance_matrix:
        if use_numeric:
            use_numeric = False
            sys.stderr.write("WARNING: -n (numeric) ignored for -d (distance matrix)\n")
        if use_hk:
            sys.stderr.write("-k (use HH and KK) invalid for -d (distance matrix)\n");
            usage(sys.argv[0])

    if (secstruct_program == "pmml" and 
        (min_sse_len == None  or min_sse_len < 3)):
        sys.stderr.write("WARNING: PMML can give SSEs of length 1 or 2 causing axis fitting to fail, setting minimum length to 3 as if -m3 were specfified\n")
        min_sse_len = 3

    if fident:
        if not fortran_format:
            sys.stderr.write("-i is only valid with -f\n")
            usage(sys.argv[0])
        elif len(fident) > 8:
            sys.stderr.write("identifier must be 8 chars or less\n")
            usage(sys.argv[0])

    if use_old_format and (build_both or
                           use_hk or use_numeric or fortran_format or
                           do_shuffle or savefilename):
        sys.stderr.write("-e (use old .angles format) is not compatible "
                         "with -b -k or -n or -f or -u or -o\n")
        usage(os.path.basename(sys.argv[0]))
              
    if len(args) != 1:
        usage(os.path.basename(sys.argv[0]))

    pdb_filename = args[0]

    # check for compressed files. We only support gzip (.gz)
    # Note we are not using the zlib or GzipFile python modules
    # since we are calling to external programs which require the
    # file uncompressed themsevles anyway so we'll just run gzip
    # to uncompress the file to a temporary directory.
    pdb_file_basename = os.path.basename(pdb_filename)
    (name,extension) = os.path.splitext(pdb_file_basename)
    if extension == '.gz':
        TMPDIR = os.tempnam(None, "ptgz")
        os.mkdir(TMPDIR)
        tmp_pdbfilename = os.path.join(TMPDIR, name)
        os.system("gzip " + pdb_filename + " -d -c > " + tmp_pdbfilename)
        our_pdb_filename = tmp_pdbfilename
        used_tmp_file = True
    else:
        our_pdb_filename = pdb_filename
        used_tmp_file = False

    try:
        if fortran_format and fident:
            pdbid = fident
        else:
            pdbid = name.upper()
            if len(pdbid) >= 6 and pdbid[:3] == "PDB":
                pdbid = pdbid[3:7]
            if chainid:
                pdbid += '_' + chainid

        # parse PDB file
        pdb_parser = PDBParser()
        pdb_struct = pdb_parser.get_structure(pdbid, our_pdb_filename)
        # create the Tableaux and output them
        (tableaux_list, ssestr_list) = make_tableaux(our_pdb_filename,
                                      pdb_struct,
                                      secstruct_program,
                                      domain_program,
                                      include_310_helices,
                                      include_pi_helices,
                                      (use_numeric or use_old_format),
                                      sse_id_list,
                                      use_hk,
                                      min_sse_len,
                                      build_distance_matrix,
                                      chainid,
                                      domainid)
        if build_both:
            (distmatrix_list, ssestr_list) = make_tableaux(our_pdb_filename,
                                            pdb_struct,
                                            secstruct_program,
                                            domain_program,
                                            include_310_helices,
                                            include_pi_helices,
                                            use_numeric,
                                            sse_id_list,
                                            use_hk,
                                            min_sse_len,
                                            True, # build_distance_matrix
                                            chainid,
                                            domainid)
        i = 1
        for tableau in tableaux_list:
            n = len(tableau)
            permutation = range(n) # used to permute rows/cols: null permutation
            if do_shuffle:
                random.shuffle(permutation) # actually permute for shuffle mode
                if verbose:
                    sys.stderr.write('permutation is: ' + str(permutation)+'\n')
                sys.stdout.write('permutation = ' + ','.join([str(x+1) for x in permutation]) + '\n')
            if i > 1:
                sys.stdout.write('\ndomain ' + str(i) + ':\n')

            if fortran_format:
                sys.stdout.write("%7.7s %4d\n" % (pdbid.upper(), n))

            if use_old_format:
                if build_distance_matrix:
                    write_distmatrix_old_format(n, tableau, ssestr_list[i-1])
                else:
                    write_tableau_old_format(n, tableau, ssestr_list[i-1])
            else:
                write_tableau(n, tableau, permutation, use_numeric,
                              fortran_format, build_distance_matrix)

            if build_both:
                write_tableau(n, distmatrix_list[i-1],
                              permutation, use_numeric,
                              fortran_format, True)
                
            i += 1
    finally:
        if used_tmp_file:
            cleanup_tmpdir(TMPDIR)


    if savefilename:
        if verbose:
            sys.stderr.write('writing tableau to ' + savefilename +'\n')
        fh = open(savefilename, "w")
        if len(tableaux_list) > 1:
            sys.stderr.write('WARNING: only saving first tableau in list\n')
        if build_distance_matrix:
            pickle.dump(distmatrix, fh)
        elif use_numeric:
            # Numeric/numpy seems to have no 'packed' format for symmetric
            # matrices, so we just have to dump the whole thing.
            pickle.dump(Omega, fh)
        else:
            pickle.dump(PTTableauPacked(tableaux_list[0]), fh)
        fh.close()
def main():
    """
    main for buildtableauxdb.py

    Usage: pytableaucreate [-35knv] [-d] [-m min_sse_len ]
             [-t structprog] [-p domainprog]
             <pdbroot> <dbname>


    WARNING: dbname is overwritten if it exists
    
    -3 specifies to include 3_10 helices in the diagram. Default is only
       alpha helices.

    -5 specifies to include pi helices in the diagram. Defaul is only
       alpha helices.

    -d build SSE distance matrices not tableaux

    -k use the HH and KK codes for respectively antiparallel and parallel
       strands in the same sheet, rather than the O, P etc. codes.

    -m min_sse_len :  specifies the minimum SSE length to include in tableaux.

    -n use numeric values (Omega matrix) rather than tableau.

    -p specify the domain decomposition method.
       Valid values are 'none' (default), 'ddomain', 'cath:cdf_filename'.

    -t specifies the secondary structure assignment program to use.
       Currently suppoed is 'pdb' and 'dssp' and 'stride' and 'pmml'.
       Default 'pdb'.

    -v specifies verbose mode: debugging output is written to stderr.
    """
    global verbose

    try:
        opts, args = getopt.getopt(sys.argv[1:], "35dknm:p:t:v?")
    except getopt.GetoptError:
        usage(os.path.basename(sys.argv[0]))

    valid_secstruct_programs = ["dssp", "stride", "pdb", "pmml"]
    valid_domain_programs = getdomains.valid_domain_programs + [r"none"]
    valid_domain_programs_re = [re.compile(re_str) for re_str in valid_domain_programs]

    verbose = False  # global (python globals are only 'global' to module though)
    secstruct_program = "dssp"
    include_310_helices = False
    include_pi_helices = False
    use_hk = False
    domain_program = "none"
    min_sse_len = None
    use_numeric = False
    build_dist_matrices = False

    for opt, arg in opts:
        if opt == "-3":  # include 3_10 helices
            include_310_helices = True
        elif opt == "-5":  # include pi helices
            include_pi_helices = True
        elif opt == "-d":  # build distance matrices not tableaux
            build_dist_matrices = True
        elif opt == "-k":  # use HH and KK codes
            use_hk = True
        elif opt == "-m":  # min sse length
            min_sse_len = int(arg)
        elif opt == "-n":  # use numeric values (Omega matrix)
            use_numeric = True
        elif opt == "-p":  # domain parsing program
            domain_program = None
            for valid_domarg_re in valid_domain_programs_re:
                if valid_domarg_re.match(arg):
                    domain_program = arg
                    break
            if domain_program == None:
                sys.stderr.write("valid values for -p are: " + str(valid_domain_programs) + "\n")
                usage(sys.argv[0])
        elif opt == "-t":
            if arg not in valid_secstruct_programs:
                sys.stderr.write("valid values for -t are: " + str(valid_secstruct_programs) + "\n")
                usage(sys.argv[0])
            secstruct_program = arg
        elif opt == "-v":  # verbose
            verbose = True  # this module only
            ptnode_set_verbose(True)  # ptnode module
            ptsecstruct.ptsecstruct_set_verbose(True)  # ptsecstruct module
            ptdomain_set_verbose(True)  # ptdomain module
        else:
            usage(sys.argv[0])

    if len(args) != 2:
        usage(os.path.basename(sys.argv[0]))

    if build_dist_matrices:
        if use_numeric:
            use_numeric = False
            sys.stderr.write("WARNING: -n (numeric) ignored for -d (distance matrix)\n")
        if use_hk:
            sys.stderr.write("-k (use HH and KK) invalid for -d (distance matrix)\n")
            usage(sys.argv[0])

    if use_numeric and use_hk:
        sys.stderr.write("-n (numeric) and -k (use HH and KK codes) are " "mutually exlusive\n")
        usage(sys.argv[0])

    sys.stdout.write(sys.argv[0] + ": version is: " + get_version() + "\n")
    sys.stdout.write(sys.argv[0] + ": options are: " + str(sys.argv[1:]) + "\n")

    input_root = args[0]
    output_filename = args[1]

    fh = open(output_filename, "w")
    tableau_db = build_db(
        input_root,
        secstruct_program,
        domain_program,
        include_310_helices,
        include_pi_helices,
        min_sse_len,
        use_numeric,
        use_hk,
        build_dist_matrices,
    )
    if build_dist_matrices:
        sys.stdout.write("writing SSE distance matrix db to " + output_filename + "...\n")
    else:
        sys.stdout.write("writing tableaux db to " + output_filename + "...\n")
    pickle.dump(tableau_db, fh)
    fh.close()
    sys.stdout.write("done.\n")