Python reference_index 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: idepi.util

메소드/함수: reference_index

hotexamples.com에서의 예제들: 4

Python reference_index - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 idepi.util.reference_index에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: _phylo.py 프로젝트: stevenweaver/idepi

def main(args=None):
    if args is None:
        args = sys_argv[1:]

    parser = ArgumentParser(
        description='Generate a phylogeny from an alignment.')
    parser.add_argument('ALIGNMENT', type=PathType)
    parser.add_argument('OUTPUT', type=PathType)

    ns = parser.parse_args(args)

    msa = load_stockholm(ns.ALIGNMENT)

    try:
        refidx = reference_index(msa, is_refseq)
    except IndexError:
        raise RuntimeError('No reference sequence found!')

    labels = MSAVectorizer(AminoEncoder).fit(msa).get_feature_names()

    seqrecords = [r for i, r in enumerate(msa) if not i == refidx]

    tree, alignment = Phylo()(seqrecords)

    PhyloGzFile.write(ns.OUTPUT, tree, alignment, labels)

    return 0

예제 #2

파일 보기

파일: _phylo.py 프로젝트: nlhepler/idepi

def main(args=None):
    if args is None:
        args = sys_argv[1:]

    parser = ArgumentParser(description='Generate a phylogeny from an alignment.')
    parser.add_argument('ALIGNMENT', type=PathType)
    parser.add_argument('OUTPUT', type=PathType)

    ns = parser.parse_args(args)

    msa = load_stockholm(ns.ALIGNMENT)

    try:
        refidx = reference_index(msa, is_refseq)
    except IndexError:
        raise RuntimeError('No reference sequence found!')

    labels = SiteVectorizer(AminoEncoder).fit(msa).get_feature_names()

    seqrecords = [r for i, r in enumerate(msa) if not i == refidx]

    tree, alignment = Phylo()(seqrecords)

    PhyloGzFile.write(ns.OUTPUT, tree, alignment, labels)

    return 0

예제 #3

파일 보기

파일: _discrete.py 프로젝트: nlhepler/idepi

def test_discrete(ARGS):
    # set these to this so we don't exclude anything (just testing file generation and parsing)
    ARGS.NUM_FEATURES = 15 # should be enough, the number is known to be 13
    ARGS.MRMR_METHOD = 'MID'
    ARGS.MAX_CONSERVATION = 1.0
    ARGS.MAX_GAP_RATIO    = 1.0
    ARGS.MIN_CONSERVATION = 1.0
    ARGS.CUTOFF = 20.

    # if we don't do this, DOOMBUNNIES
    set_util_params(ARGS.REFSEQ_IDS)

    fd, sto_filename = mkstemp(); close(fd)

    try:
        fh = open(sto_filename, 'w')
        print(TEST_AMINO_STO, file=fh)
        fh.close()

        alignment = AlignIO.read(sto_filename, 'stockholm')

        for ARGS.ENCODER in (AminoEncoder, StanfelEncoder):

            if ARGS.ENCODER == StanfelEncoder:
                TEST_NAMES = TEST_STANFEL_NAMES
                TEST_X = TEST_STANFEL_X
            else:
                TEST_NAMES = TEST_AMINO_NAMES
                TEST_X = TEST_AMINO_X

            # test mRMR and LSVM file generation
            ylabeler = Labeler(
                seqrecord_get_values,
                lambda row: is_refseq(row) or False, # TODO: again filtration function
                lambda x: x > ARGS.CUTOFF,
                False
            )
            alignment, y, ic50 = ylabeler(alignment)

            refidx = reference_index(alignment, is_refseq)
            alignment = LabeledMSA.from_msa_with_ref(alignment, refidx)
            extractor = SiteVectorizer(ARGS.ENCODER)
            x = extractor.fit_transform(alignment)
            colnames = extractor.get_feature_names()

            # test the feature names portion
            try:
                assert(len(colnames) == len(TEST_NAMES))
            except AssertionError:
                raise AssertionError('gen:   %s\ntruth: %s' % (colnames, TEST_NAMES))

            for name in TEST_NAMES:
                try:
                    assert(name in colnames)
                except AssertionError:
                    raise AssertionError('ERROR: \'%s\' not found in %s' % (name, ', '.join(colnames)))

            assert(np.all(TEST_X == x))

            assert(np.all(TEST_Y == y))

            # generate and test the mRMR portion
            mrmr = MRMR(
                estimator=SVC(kernel='linear'),
                n_features_to_select=ARGS.NUM_FEATURES,
                method=ARGS.MRMR_METHOD,
                normalize=ARGS.MRMR_NORMALIZE,
                similar=ARGS.SIMILAR
                )

            mrmr.fit(x, y)

    finally:
        remove(sto_filename)

    print('ALL TESTS PASS', file=sys.stderr)

예제 #4

파일 보기

파일: _discrete.py 프로젝트: stevenweaver/idepi

def test_discrete(ARGS):
    # set these to this so we don't exclude anything (just testing file generation and parsing)
    ARGS.NUM_FEATURES = 15 # should be enough, the number is known to be 13
    ARGS.MRMR_METHOD = 'MID'
    ARGS.MAX_CONSERVATION = 1.0
    ARGS.MAX_GAP_RATIO    = 1.0
    ARGS.MIN_CONSERVATION = 1.0
    ARGS.CUTOFF = 20.

    # if we don't do this, DOOMBUNNIES
    set_util_params(ARGS.REFSEQ_IDS)

    fd, sto_filename = mkstemp(); close(fd)

    try:
        fh = open(sto_filename, 'w')
        print(TEST_AMINO_STO, file=fh)
        fh.close()

        alignment = AlignIO.read(sto_filename, 'stockholm')

        for ARGS.ENCODER in (AminoEncoder, StanfelEncoder):

            if ARGS.ENCODER == StanfelEncoder:
                TEST_NAMES = TEST_STANFEL_NAMES
                TEST_X = TEST_STANFEL_X
            else:
                TEST_NAMES = TEST_AMINO_NAMES
                TEST_X = TEST_AMINO_X

            # test mRMR and LSVM file generation
            ylabeler = Labeler(
                seqrecord_get_values,
                lambda row: is_refseq(row) or False, # TODO: again filtration function
                lambda x: x > ARGS.CUTOFF,
                False
            )
            alignment, y, ic50 = ylabeler(alignment)

            refidx = reference_index(alignment, is_refseq)
            alignment = LabeledMSA.from_msa_with_ref(alignment, refidx)
            extractor = MSAVectorizer(ARGS.ENCODER)
            x = extractor.fit_transform(alignment)
            colnames = extractor.get_feature_names()

            # test the feature names portion
            try:
                assert(len(colnames) == len(TEST_NAMES))
            except AssertionError:
                raise AssertionError('gen:   %s\ntruth: %s' % (colnames, TEST_NAMES))

            for name in TEST_NAMES:
                try:
                    assert(name in colnames)
                except AssertionError:
                    raise AssertionError('ERROR: \'%s\' not found in %s' % (name, ', '.join(colnames)))

            assert(np.all(TEST_X == x))

            assert(np.all(TEST_Y == y))

            # generate and test the mRMR portion
            mrmr = MRMR(
                estimator=SVC(kernel='linear'),
                n_features_to_select=ARGS.NUM_FEATURES,
                method=ARGS.MRMR_METHOD,
                normalize=ARGS.MRMR_NORMALIZE,
                similar=ARGS.SIMILAR
                )

            mrmr.fit(x, y)

    finally:
        remove(sto_filename)

    print('ALL TESTS PASS', file=sys.stderr)