Example #1
0
def main():

    # read command line options
    opt = parse_args()

    # print logo
    if opt.logo:
        ccmpred.logo.logo()

    # set OMP environment variable for number of threads
    os.environ['OMP_NUM_THREADS'] = str(opt.num_threads)
    print("Using {0} threads for OMP parallelization.".format(
        os.environ["OMP_NUM_THREADS"]))

    # instantiate CCMpred
    ccm = CCMpred()

    # specify possible file paths
    ccm.set_alignment_file(opt.alnfile)
    ccm.set_matfile(opt.matfile)
    ccm.set_pdb_file(opt.pdbfile)
    ccm.set_initraw_file(opt.initrawfile)

    # read alignment and possible remove gapped sequences and positions
    ccm.read_alignment(opt.aln_format, opt.max_gap_pos, opt.max_gap_seq)

    # compute sequence weights (in order to reduce sampling bias)
    ccm.compute_sequence_weights(opt.weight, opt.wt_cutoff)

    # compute amino acid counts and frequencies adding pseudo counts for non-observed amino acids
    ccm.compute_frequencies(opt.pseudocounts, opt.pseudocount_single,
                            opt.pseudocount_pair)

    # read pdb file if CCMpred is setup as a constrained run
    if opt.pdbfile:
        ccm.read_pdb(opt.contact_threshold)

    # if alternative scores are specified: compute these and exit
    if opt.omes:
        ccm.compute_omes(opt.omes_fodoraldrich)
        ccm.write_matrix()
        sys.exit(0)

    if opt.mi:
        ccm.compute_mutual_info(opt.mi_normalized, opt.mi_pseudocounts)
        ccm.write_matrix()
        sys.exit(0)

    # setup L2 regularization
    ccm.specify_regularization(opt.lambda_single,
                               opt.lambda_pair_factor,
                               reg_type="L2",
                               scaling="L",
                               single_prior=opt.single_prior)

    # intialise single and pair potentials either:
    #   - according to regularization priors
    #   - from initrawfile (accounting for removal of many gapped positions, if applicable)
    ccm.intialise_potentials()

    # optimize objective function (pLL or CD/PCD) with optimization algorithm (LBFGS, CG, GD or ADAM)
    if opt.optimize:

        #initialize log object
        ccm.initiate_logging(opt.plot_opt_progress)

        #minimize objective function with corresponding optimization algorithm
        ccm.minimize(opt)
    else:
        print("\nDo not optimize but use model parameters provided by {0}\n".
              format(opt.initrawfile))

    ### Post Processing

    #specify meta data, and write (corrected) contact matrices to files
    if opt.matfile:

        # Compute contact score (frobenius norm) by recentering potentials
        # TODO: other scores can be added ...
        ccm.compute_contact_matrix(recenter_potentials=True, frob=True)

        # compute corrected contact maps (removing entropy/phylogenetic biases)
        # TODO: other corrections can be added ...
        ccm.compute_correction(
            apc_file=opt.apc_file,
            entropy_correction_file=opt.entropy_correction_file)

        ccm.write_matrix()

    # write model parameters in binary format
    if opt.out_binary_raw_file:
        ccm.write_binary_raw(opt.out_binary_raw_file)

    exitcode = 0
    if opt.optimize:
        if ccm.algret['code'] < 0:
            exitcode = -ccm.algret['code']
    sys.exit(exitcode)
def main():

    #Read command line options
    opt = parse_args()

    if opt.logo:
        ccmpred.logo.logo()

    #set OMP environment variable for number of threads
    os.environ['OMP_NUM_THREADS'] = str(opt.num_threads)
    print("Using {0} threads for OMP parallelization.".format(
        os.environ["OMP_NUM_THREADS"]))

    ccm = CCMpred(opt.alnfile, opt.matfile)

    ##############################
    ### Setup
    ##############################

    #read alignment and compute amino acid counts and frequencies
    ccm.read_alignment(opt.aln_format, opt.max_gap_ratio)
    ccm.compute_sequence_weights(opt.weight, opt.wt_ignore_gaps, opt.wt_cutoff)
    ccm.compute_frequencies(opt.pseudocounts, opt.pseudocount_single,
                            opt.pseudocount_pair)

    #if alternative scores are specified: compute these and exit
    if opt.omes:
        ccm.compute_omes(opt.omes_fodoraldrich)
        ccm.write_matrix()
        sys.exit(0)

    if opt.mi:
        ccm.compute_mutual_info(opt.mi_normalized, opt.mi_pseudocounts)
        ccm.write_matrix()
        sys.exit(0)

    #setup L2 regularization
    ccm.specify_regularization(opt.lambda_single,
                               opt.lambda_pair_factor,
                               reg_type=opt.reg_type,
                               scaling=opt.scaling)

    #intialise single and pair potentials either:
    #   - according to regularization priors
    #   - from file
    ccm.intialise_potentials(opt.initrawfile)

    ##############################
    ### Optimize objective function (pLL or CD) with optimization algorithm (CG, GD or ADAM)
    ##############################
    if opt.optimize:

        # specify objective function
        objfun = OBJ_FUNC[opt.objfun](opt, ccm)

        # specify optimizer
        alg = ALGORITHMS[opt.algorithm](opt, ccm)

        #minimize objective function with optimizer
        ccm.minimize(objfun, alg)
    else:
        print(
            "\nDo not optimize but load couplings from binary raw file {0}\n".
            format(opt.initrawfile))

    ##############################
    ### Post Processing
    ##############################

    # Compute contact score (frobenius norm) by possibly recentering potentials
    # TODO: other scores can be added ...
    ccm.compute_contact_matrix(recenter_potentials=opt.centering_potentials,
                               frob=opt.frob)

    # and bias correction to contact score
    ccm.compute_correction(apc=opt.apc,
                           entropy_correction=opt.entropy_correction)

    #specify meta data, and write (corrected) contact matrices to files
    ccm.write_matrix()

    if opt.cd_alnfile and hasattr(ccm.f, 'msa_sampled'):
        ccm.write_sampled_alignment(opt.cd_alnfile)

    if opt.out_binary_raw_file:
        ccm.write_binary_raw(opt.out_binary_raw_file)

    exitcode = 0
    if opt.optimize:
        if ccm.algret['code'] < 0:
            exitcode = -ccm.algret['code']
    sys.exit(exitcode)