Example #1
0
def test_predict(version, clss, exp_val):
    """ Pretty much a repeat of test_rcds. """

    with open(model_filepath(version), "rb") as handle:
        model = Model.read(handle)

    required_cols = list(model.hmm_lengths.keys())

    files = test_files(version)
    with open(files["hmmer_text"], "r") as handle:
        parsed = HMMER.from_file(
            handle,
            model.hmm_lengths,
            "hmmer3-text",
        )
        counts = cazy_counts_multi([parsed], ["test"], required_cols)

    pred = model.predict(counts)
    rcds = pred.rcd

    rcds_dict = dict()
    for res in rcds:
        assert res.label == "test"
        rcds_dict[(res.label, res.nomenclature,
                   res.nomenclature_class)] = res.value

    # Checks that results are accurate to 5 decimal places.
    assert_almost_equal(
        exp_val,
        rcds_dict[("test", "nomenclature3", clss)],
        decimal=5
    )
    return
Example #2
0
def test_parse_hmmer_text_output(version, idx, col, exp_val):

    with open(model_filepath(version), "rb") as handle:
        model = Model.read(handle)

    files = test_files(version)
    with open(files["hmmer_text"], "r") as handle:
        sample = list(
            HMMER.from_file(
                handle,
                model.hmm_lengths,
                "hmmer3-text",
            ))

    assert getattr(sample[idx], col) == exp_val
    return
Example #3
0
def test_cazy_counts(version, hmm, exp_val):
    with open(model_filepath(version), "rb") as handle:
        model = Model.read(handle)

    required_cols = list(model.hmm_lengths.keys())

    files = test_files(version)
    with open(files["hmmer_text"], "r") as handle:
        parsed = HMMER.from_file(
            handle,
            model.hmm_lengths,
            "hmmer3-text",
        )
        counts = cazy_counts(parsed, required_cols)

    column_index = required_cols.index(hmm)
    assert counts[column_index] == exp_val
    return
Example #4
0
def test_pca(version, pc, exp_val):

    with open(model_filepath(version), "rb") as handle:
        model = Model.read(handle)

    required_cols = list(model.hmm_lengths.keys())

    files = test_files(version)
    with open(files["hmmer_text"], "r") as handle:
        parsed = HMMER.from_file(
            handle,
            model.hmm_lengths,
            "hmmer3-text",
        )
        counts = cazy_counts_multi([parsed], ["test"], required_cols)

    pred = model.predict(counts)
    trans = pred.pca

    # Checks that results are accurate to 5 decimal places.
    column_index = trans.columns.index(pc)
    assert_almost_equal(exp_val, trans.arr[0, column_index], decimal=5)
    return
Example #5
0
def main():  # noqa
    """ The cli interface to CATAStrophy. """

    try:
        args = cli(prog=sys.argv[0], args=sys.argv[1:])
    except MyArgumentError as e:
        print(e.message, file=sys.stderr)
        sys.exit(e.errno)

    infile_names = [f.name for f in args.infile]

    if args.labels is None:
        labels = infile_names
    elif len(args.labels) != len(args.infile):
        msg = ("argument labels and inhandles: \n"
               "When specified, the number of labels must be the same as the "
               "number of input files. Exiting.\n")

        print(msg, file=sys.stderr)
        sys.exit(EXIT_CLI)
    else:
        labels = args.labels

    if args.model_file is not None:
        model = Model.read(args.model_file)
    else:
        with open(data.model_filepath(args.model_version), "rb") as handle:
            model = Model.read(handle)

    try:
        runner(
            args.infile,
            args.outhandle,
            args.pca_handle,
            args.counts_filepath,
            labels,
            args.file_format,
            model,
        )

    except ParseError as e:
        if e.line is not None:
            header = "Failed to parse file <{}> at line {}.\n".format(
                e.filename, e.line)
        else:
            header = "Failed to parse file <{}>.\n".format(e.filename)

        print("{}\n{}".format(header, e.message), file=sys.stderr)
        sys.exit(EXIT_INPUT_FORMAT)

    except HMMError as e:
        msg = ("Encountered an hmm that wasn't present in the training data.\n"
               f"Offending HMMs were: {', '.join(e.hmms)}")
        print(msg, file=sys.stderr)
        sys.exit(EXIT_INPUT_FORMAT)
        pass

    except OSError as e:
        msg = (
            "Encountered a system error.\n"
            "We can't control these, and they're usually related to your OS.\n"
            "Try running again.\n")
        print(msg, file=sys.stderr)
        print(e.strerror, file=sys.stderr)
        sys.exit(EXIT_SYSERR)

    except MemoryError:
        msg = ("Ran out of memory!\n"
               "Catastrophy shouldn't use much RAM, so check other "
               "processes and try running again.")
        print(msg, file=sys.stderr)
        sys.exit(EXIT_SYSERR)

    except KeyboardInterrupt:
        print("Received keyboard interrupt. Exiting.", file=sys.stderr)
        sys.exit(EXIT_KEYBOARD)

    except Exception as e:
        msg = (
            "I'm so sorry, but we've encountered an unexpected error.\n"
            "This shouldn't happen, so please file a bug report with the "
            "authors.\nWe will be extremely grateful!\n\n"
            "You can email us at {}.\n"
            "Alternatively, you can file the issue directly on the repo "
            "<https://bitbucket.org/ccdm-curtin/catastrophy/issues>\n\n"
            "Please attach a copy of the following message:").format(__email__)
        print(e, file=sys.stderr)
        traceback.print_exc(file=sys.stderr)
        sys.exit(EXIT_UNKNOWN)

    return