if not args.train_str_model: check_cmd += " -onlyseq" print(check_cmd) output = subprocess.getoutput(check_cmd) params_file = args.data_id + ".params" assert os.path.exists( params_file ), "Hyperparameter optimization output " ' .params file "%s" not found' % ( params_file) # Add model type to params file. if args.train_str_model: gplib.echo_add_to_file("model_type: structure", params_file) else: gplib.echo_add_to_file("model_type: sequence", params_file) # Get parameter string. param_string = gplib.graphprot_get_param_string(params_file) """ Do the model training. (Yowza!) """ print("Starting model training (-action train) ... ") check_cmd = ("GraphProt.pl -action train -prefix " + args.data_id + " -fasta " + pos_train_fa + " -negfasta " + neg_train_fa + " " + param_string) print(check_cmd) output = subprocess.getoutput(check_cmd) assert output, "The following call of GraphProt.pl produced no output:\n%s" % ( check_cmd) if args.gp_output: print(output) model_file = args.data_id + ".model"
assert (seq_id in reg_len_dic ), 'sequence ID "%s" missing in input .bed "%s"' % ( seq_id, args.genomic_sites_bed, ) reg_l = reg_len_dic[seq_id] assert ( seq_l == reg_l ), "sequence length differs from .bed region length (%i != %i)" % ( seq_l, reg_l, ) # Read in model parameters. param_dic = gplib.graphprot_get_param_dic(args.in_params) # Create GraphProt parameter string. param_string = gplib.graphprot_get_param_string(args.in_params) """ Run predictions. """ if args.ws_pred: # Do whole site prediction. print("Starting whole site predictions on input .fa file" " (-action predict) ... ") check_cmd = ("GraphProt.pl -action predict -prefix " + args.data_id + " -fasta " + args.in_fa + " " + param_string + " -model " + args.in_model) output = subprocess.getoutput(check_cmd) assert ( output ), "the following call of GraphProt.pl " "produced no output:\n%s" % (