Python get_splitの例

プログラミング言語: Python

名前空間/パッケージ名: schemes.split_df

メソッド/関数: get_split

hotexamples.comのコード掲載数: 4

Python get_split - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのschemes.split_df.get_splitの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: test_schemes_C.py プロジェクト: wg12385/selection_schemes

def test_scheme_C2():

    # Need to test it as a learning curve, not just pick one

    num = 3

    atom_df = pd.read_pickle('tests/test_mols/atoms.pkl')

    pair_df = pd.read_pickle('tests/test_mols/pairs.pkl')
    mol_df, graphs = graphin.make_graph_df(atom_df, pair_df)

    total = len(graphs)
    counter = 0
    chosen = []
    while len(chosen) < len(graphs) - num:

        # Iterations continue until there are not enough graphs to select
        # Each iteration we want (num*n) graphs in train, and (total-(num*n)) graphs in test
        # Make the selection the same each time, we just want to select chosen from the df
        # We then just grow chosen each time using the selection scheme

        counter += 1
        chosen = select_molecules_C2(mol_df,
                                     atom_df,
                                     pair_df,
                                     prev_chosen=chosen,
                                     num=num)
        train_graphs, train_mol_df, test_graphs, test_mol_df = get_split(
            chosen, mol_df, graphs)

        assert len(train_graphs) == num * counter
        assert len(test_graphs) == total - (num * counter)
        assert len(train_mol_df.molecule_name.unique()) == len(
            train_mol_df.molecule_name)
        assert len(test_mol_df.molecule_name.unique()) == len(
            test_mol_df.molecule_name)
        assert counter < total / num

        for molname in train_mol_df.molecule_name.unique():
            assert not molname in test_mol_df.molecule_name.unique()

        for molname1 in train_mol_df.molecule_name:
            mol_df1 = atom_df.loc[(atom_df.molecule_name == molname1)]["conn"]
            for molname2 in test_mol_df.molecule_name:
                mol_df2 = atom_df.loc[(
                    atom_df.molecule_name == molname1)]["conn"]
                count1 = [bond for conn in mol_df1.values
                          for bond in conn].count(2)
                count2 = [bond for conn in mol_df2.values
                          for bond in conn].count(2)
                assert count1 >= count2

コード例 #2

ファイルを表示

def test_scheme_D6():
    num = 3

    atom_df = pd.read_pickle('tests/test_mols/atoms.pkl')
    pair_df = pd.read_pickle('tests/test_mols/pairs.pkl')
    mol_df, graphs = graphin.make_graph_df(atom_df, pair_df)

    total = len(graphs)
    counter = 0
    chosen = []
    while len(chosen) < len(graphs) - num:

        # Iterations continue until there are not enough graphs to select
        # Each iteration we want (num*n) graphs in train, and (total-(num*n)) graphs in test
        # Make the selection the same each time, we just want to select chosen from the df
        # We then just grow chosen each time using the selection scheme

        counter += 1

        chosen = select_molecules_D6(mol_df,
                                     atom_df,
                                     pair_df,
                                     prev_chosen=chosen,
                                     num=num)
        train_graphs, train_mol_df, test_graphs, test_mol_df = get_split(
            chosen, mol_df, graphs)

        assert len(train_graphs) == num * counter
        assert len(test_graphs) == total - (num * counter)
        assert len(train_mol_df.molecule_name.unique()) == len(
            train_mol_df.molecule_name)
        assert len(test_mol_df.molecule_name.unique()) == len(
            test_mol_df.molecule_name)
        assert counter < total / num

        for molname1 in train_mol_df.molecule_name:
            mol_df1 = atom_df.loc[(
                atom_df.molecule_name == molname1)]["typestr"]
            count1 = 0
            for type in mol_df1.values:
                if type not in ['H', 'C', 'N', 'O', 'F']:
                    count1 += 1
            for molname2 in test_mol_df.molecule_name:
                mol_df2 = atom_df.loc[(
                    atom_df.molecule_name == molname2)]["typestr"]
                count2 = 0
                for type in mol_df2.values:
                    if type not in ['H', 'C', 'N', 'O', 'F']:
                        count2 += 1
                assert count1 >= count2

コード例 #3

ファイルを表示

def test_scheme_B2():

    # Need to test it as a learning curve, not just pick one

    num = 3

    atom_df = pd.read_pickle('tests/test_mols/atoms.pkl')
    pair_df = pd.read_pickle('tests/test_mols/pairs.pkl')
    mol_df, graphs = graphin.make_graph_df(atom_df, pair_df)

    total = len(graphs)
    counter = 0
    chosen = []
    while len(chosen) < len(graphs) - num:

        # Iterations continue until there are not enough graphs to select
        # Each iteration we want (num*n) graphs in train, and (total-(num*n)) graphs in test
        # Make the selection the same each time, we just want to select chosen from the df
        # We then just grow chosen each time using the selection scheme

        counter += 1

        chosen = select_molecules_B2(mol_df,
                                     atom_df,
                                     pair_df,
                                     prev_chosen=chosen,
                                     num=num)
        train_graphs, train_mol_df, test_graphs, test_mol_df = get_split(
            chosen, mol_df, graphs)

        assert len(train_graphs) == num * counter
        assert len(test_graphs) == total - (num * counter)
        assert len(train_mol_df.molecule_name.unique()) == len(
            train_mol_df.molecule_name)
        assert len(test_mol_df.molecule_name.unique()) == len(
            test_mol_df.molecule_name)
        assert counter < total / num

        for molname in train_mol_df.molecule_name.unique():
            assert not molname in test_mol_df.molecule_name.unique()

        for graph1 in train_graphs:
            for graph2 in test_graphs:
                assert graph1.number_of_nodes() <= graph2.number_of_nodes()

コード例 #4

ファイルを表示

def test_scheme_I7():

    num = 3

    atom_df = pd.read_pickle('tests/test_mols/atoms.pkl')
    pair_df = pd.read_pickle('tests/test_mols/pairs.pkl')
    mol_df, graphs = graphin.make_graph_df(atom_df, pair_df)

    total = len(graphs)
    counter = 0
    chosen = []
    while len(chosen) < len(graphs) - num:

        atom_df = add_randomised_FEPs(atom_df)
        atom_df = add_randomised_FEP_vars(atom_df)

        counter += 1
        chosen = select_molecules_I7(mol_df,
                                     atom_df,
                                     pair_df,
                                     prev_chosen=chosen,
                                     num=num)
        train_graphs, train_mol_df, test_graphs, test_mol_df = get_split(
            chosen, mol_df, graphs)

        assert len(train_graphs) == num * counter, print(
            len(train_graphs), num, counter)
        assert len(test_graphs) == total - (num * counter)
        assert len(train_mol_df.molecule_name.unique()) == len(
            train_mol_df.molecule_name)
        assert len(test_mol_df.molecule_name.unique()) == len(
            test_mol_df.molecule_name)
        assert counter < total / num

        for molname in train_mol_df.molecule_name.unique():
            assert not molname in test_mol_df.molecule_name.unique()
    assert counter == 2
    assert len(chosen) == len(graphs) - len(graphs) % 3