Beispiel #1
0
    print("# negative .fa sequences:   %i" % (c_neg_fa))
    # Check additional files.
    if args.opt_pos_fa:
        assert args.opt_neg_fa, "--opt-pos but no --opt-neg given"
    if args.opt_neg_fa:
        assert args.opt_pos_fa, "--opt-neg but no --opt-pos given"
    # Check for lowercase only sequences, which cause GP to crash.
    error_mess = (
        "input sequences encountered containing "
        "only lowercase characters or lowercase characters in between "
        "uppercase characters. Please provide either all uppercase "
        "sequences or sequences containing uppercase regions surrounded "
        "by lowercase context regions for structure calculation (see "
        "viewpoint concept in original GraphProt publication "
        "for more details)")
    seqs_dic = gplib.read_fasta_into_dic(args.in_pos_fa)
    bad_ids = gplib.check_seqs_dic_format(seqs_dic)
    assert not bad_ids, "%s" % (error_mess)
    seqs_dic = gplib.read_fasta_into_dic(args.in_neg_fa)
    bad_ids = gplib.check_seqs_dic_format(seqs_dic)
    assert not bad_ids, "%s" % (error_mess)
    if args.opt_pos_fa:
        seqs_dic = gplib.read_fasta_into_dic(args.opt_pos_fa)
        bad_ids = gplib.check_seqs_dic_format(seqs_dic)
        assert not bad_ids, "%s" % (error_mess)
    if args.opt_neg_fa:
        seqs_dic = gplib.read_fasta_into_dic(args.opt_neg_fa)
        bad_ids = gplib.check_seqs_dic_format(seqs_dic)
        assert not bad_ids, "%s" % (error_mess)

    # If parop .fa files given.
    assert "linux" in sys.platform, "please use Linux"
    # Check tool availability.
    assert gplib.is_tool("GraphProt.pl"), "GraphProt.pl not in PATH"
    # Check file inputs.
    assert os.path.exists(
        args.in_fa), 'input .fa file "%s" not found' % (args.in_fa)
    assert os.path.exists(
        args.in_model), 'input .model file "%s" not found' % (args.in_model)
    assert os.path.exists(
        args.in_params), 'input .params file "%s" not found' % (args.in_params)
    # Count .fa entries.
    c_in_fa = gplib.count_fasta_headers(args.in_fa)
    assert c_in_fa, 'input .fa file "%s" no headers found' % (args.in_fa)
    print("# input .fa sequences:   %i" % (c_in_fa))
    # Read in FASTA sequences to check for uppercase sequences.
    seqs_dic = gplib.read_fasta_into_dic(args.in_fa)
    # Check for lowercase only sequences, which cause GP to crash.
    error_mess = (
        "input sequences encountered containing "
        "only lowercase characters or lowercase characters in between "
        "uppercase characters. Please provide either all uppercase "
        "sequences or sequences containing uppercase regions surrounded "
        "by lowercase context regions for structure calculation (see "
        "viewpoint concept in original GraphProt publication "
        "for more details)")
    if args.ws_pred:
        bad_ids = gplib.check_seqs_dic_format(seqs_dic)
        assert not bad_ids, "%s" % (error_mess)

    c_uc_nt = gplib.seqs_dic_count_uc_nts(seqs_dic)
    assert c_uc_nt, ("no uppercase nucleotides in input .fa sequences. "