Beispiel #1
0
    def test_gru_model_output(self):
        folder_name, dataset_name, args = generate_dataset_and_parser()
        execute_train(model_class=GRU,
                      model_args=dict(recurrent_layers=1,
                                      readout_layers=1,
                                      hidden_size=5),
                      args=args)

        assert path.exists('GRU.pkl')
        assert path.exists('0.pkl') or path.exists('1.pkl')

        os.remove('GRU.pkl')
        remove_files(folder_name, dataset_name)
Beispiel #2
0
    def test_mlp_model_output(self):
        folder_name, dataset_name, args = generate_dataset_and_parser()
        execute_train(model_class=MLPEncoder,
                      model_args=dict(layers=2,
                                      hidden_size=5,
                                      batch_norm=True),
                      args=args)

        assert path.exists('MLPEncoder.pkl')
        assert path.exists('0.pkl') or path.exists('1.pkl')

        os.remove('MLPEncoder.pkl')
        remove_files(folder_name, dataset_name)
Beispiel #3
0
    def test_hyperbolic_distance_output(self):
        folder_name, dataset_name, args = generate_dataset_and_parser()

        args = copy.copy(args)
        args.distance = 'hyperbolic'
        args.scaling = True
        execute_train(model_class=MLPEncoder,
                      model_args=dict(layers=2,
                                      hidden_size=5,
                                      batch_norm=True),
                      args=args)

        assert path.exists('MLPEncoder.pkl')
        assert path.exists('0.pkl') or path.exists('1.pkl')

        os.remove('MLPEncoder.pkl')
        remove_files(folder_name, dataset_name)
Beispiel #4
0
    def test_transformer_model_output(self):
        folder_name, dataset_name, args = generate_dataset_and_parser()

        execute_train(model_class=Transformer,
                      model_args=dict(segment_size=2,
                                      trans_layers=1,
                                      readout_layers=1,
                                      hidden_size=4,
                                      mask='empty',
                                      heads=2,
                                      layer_norm=True),
                      args=args)

        assert path.exists('Transformer.pkl')
        assert path.exists('0.pkl') or path.exists('1.pkl')

        os.remove('Transformer.pkl')
        remove_files(folder_name, dataset_name)
Beispiel #5
0
    def test_cnn_model_output(self):
        folder_name, dataset_name, args = generate_dataset_and_parser()
        execute_train(model_class=CNN,
                      model_args=dict(readout_layers=1,
                                      channels=4,
                                      layers=2,
                                      kernel_size=3,
                                      pooling='avg',
                                      non_linearity=True,
                                      batch_norm=True,
                                      stride=1),
                      args=args)

        assert path.exists('CNN.pkl')
        assert path.exists('0.pkl') or path.exists('1.pkl')

        os.remove('CNN.pkl')
        remove_files(folder_name, dataset_name)
Beispiel #6
0
    def test_sequence_output(self):
        folder_name, edit_dataset_name, args = generate_dataset_and_parser()

        args = copy.copy(args)
        args.distance = 'cosine'

        # run method storing output
        execute_train(model_class=MLPEncoder,
                      model_args=dict(layers=2,
                                      hidden_size=5,
                                      batch_norm=True),
                      args=args)

        # check output tree
        assert path.exists('sequences.fasta'), "Sequences file missing"

        # remove files
        remove_files(folder_name, edit_dataset_name)
Beispiel #7
0
    def test_euclidean_distance_stdout(self):
        folder_name, edit_dataset_name, closest_dataset_name, args = generate_dataset_and_parser()

        args = copy.copy(args)
        args.distance = 'euclidean'

        # run method storing output
        f = io.StringIO()
        with redirect_stdout(f):
            execute_train(model_class=MLPEncoder,
                          model_args=dict(layers=2,
                                          hidden_size=5,
                                          batch_norm=True),
                          args=args)
        out = f.getvalue()

        # check correct output
        assert 'Top1:' in out and 'Top5:' in out and 'Top10:' in out, 'Wrong output format for euclidean distance'

        # remove files
        remove_files(folder_name, edit_dataset_name, closest_dataset_name)
Beispiel #8
0
    def test_cosine_distance_stdout(self):
        folder_name, edit_dataset_name, hc_dataset_name, args = generate_dataset_and_parser(
        )

        args = copy.copy(args)
        args.distance = 'cosine'

        # run method storing output
        f = io.StringIO()
        with redirect_stdout(f):
            execute_train(model_class=MLPEncoder,
                          model_args=dict(layers=2,
                                          hidden_size=5,
                                          batch_norm=True),
                          args=args)
        out = f.getvalue()

        # check correct output
        assert "'single': {'DC'" in out and "'complete': {'DC'" in out and "'average': {'DC'" in out, \
            'Wrong output format for cosine distance'

        # remove files
        remove_files(folder_name, edit_dataset_name, hc_dataset_name)
Beispiel #9
0
from edit_distance.models.feedforward.model import MLPEncoder
from edit_distance.train import execute_train, general_arg_parser
from util.data_handling.data_loader import BOOL_CHOICE

parser = general_arg_parser()
parser.add_argument('--layers', type=int, default=1, help='Number of fully connected layers')
parser.add_argument('--hidden_size', type=int, default=100, help='Size of hidden layers')
parser.add_argument('--batch_norm', type=str, default='False', help='Batch normalization')

args = parser.parse_args()

assert args.batch_norm in BOOL_CHOICE, "Boolean values have to be either 'True' or 'False' "

execute_train(model_class=MLPEncoder,
              model_args=dict(layers=args.layers,
                              hidden_size=args.hidden_size,
                              batch_norm=True if args.batch_norm == 'True' else False),
              args=args)
Beispiel #10
0
from edit_distance.models.transformer.model import Transformer
from edit_distance.train import execute_train, general_arg_parser
from util.data_handling.data_loader import BOOL_CHOICE

parser = general_arg_parser()
parser.add_argument('--trans_layers', type=int, default=1, help='Number of attention layers')
parser.add_argument('--readout_layers', type=int, default=1, help='Number of final fully connected layers')
parser.add_argument('--hidden_size', type=int, default=16, help='Size of hidden tokens')
parser.add_argument('--mask', type=str, default="empty", help='Whether to apply a mask (empty or local{N})')
parser.add_argument('--heads', type=int, default=1, help='Number of attention heads at each layer')
parser.add_argument('--layer_norm', type=str, default='True', help='Layer normalization')
parser.add_argument('--segment_size', type=int, default=5, help='Number of elements for each string token')

args = parser.parse_args()

assert args.layer_norm in BOOL_CHOICE, "Boolean values have to be either 'True' or 'False' "

execute_train(model_class=Transformer,
              model_args=dict(segment_size=args.segment_size,
                              trans_layers=args.trans_layers,
                              readout_layers=args.readout_layers,
                              hidden_size=args.hidden_size,
                              mask=args.mask,
                              heads=args.heads,
                              layer_norm=True if args.layer_norm == 'True' else False),
              args=args)
Beispiel #11
0
                    default=3,
                    help='Kernel size in convolutions')
parser.add_argument('--pooling',
                    type=str,
                    default='avg',
                    help='Pooling type (avg, max or none')
parser.add_argument('--non_linearity',
                    type=str,
                    default='True',
                    help='Whether to apply non-linearity to convolutions')
parser.add_argument('--batch_norm',
                    type=str,
                    default='True',
                    help='Batch normalization')
args = parser.parse_args()

assert args.non_linearity in BOOL_CHOICE and args.batch_norm in BOOL_CHOICE, \
    "Boolean values have to be either 'True' or 'False' "

execute_train(
    model_class=CNN,
    model_args=dict(
        readout_layers=args.readout_layers,
        channels=args.channels,
        layers=args.layers,
        kernel_size=args.kernel_size,
        pooling=args.pooling,
        non_linearity=True if args.non_linearity == 'True' else False,
        batch_norm=True if args.batch_norm == 'True' else False,
        stride=args.stride),
    args=args)
Beispiel #12
0
from edit_distance.models.kmer.model import Kmer
from edit_distance.train import execute_train, general_arg_parser

parser = general_arg_parser()
parser.add_argument('--k', type=int, default=4, help='Size of kernel')

args = parser.parse_args()

execute_train(model_class=Kmer, model_args=dict(k=args.k), args=args)
Beispiel #13
0
from edit_distance.models.recurrent.model import GRU
from edit_distance.train import execute_train, general_arg_parser

parser = general_arg_parser()
parser.add_argument('--recurrent_layers',
                    type=int,
                    default=1,
                    help='Number of recurrent layers')
parser.add_argument('--readout_layers',
                    type=int,
                    default=2,
                    help='Number of readout layers')
parser.add_argument('--hidden_size',
                    type=int,
                    default=100,
                    help='Size of hidden dimension')
args = parser.parse_args()

execute_train(model_class=GRU,
              model_args=dict(recurrent_layers=args.recurrent_layers,
                              readout_layers=args.readout_layers,
                              hidden_size=args.hidden_size),
              args=args)