def test_gru_model_output(self): folder_name, dataset_name, args = generate_dataset_and_parser() execute_train(model_class=GRU, model_args=dict(recurrent_layers=1, readout_layers=1, hidden_size=5), args=args) assert path.exists('GRU.pkl') assert path.exists('0.pkl') or path.exists('1.pkl') os.remove('GRU.pkl') remove_files(folder_name, dataset_name)
def test_mlp_model_output(self): folder_name, dataset_name, args = generate_dataset_and_parser() execute_train(model_class=MLPEncoder, model_args=dict(layers=2, hidden_size=5, batch_norm=True), args=args) assert path.exists('MLPEncoder.pkl') assert path.exists('0.pkl') or path.exists('1.pkl') os.remove('MLPEncoder.pkl') remove_files(folder_name, dataset_name)
def test_hyperbolic_distance_output(self): folder_name, dataset_name, args = generate_dataset_and_parser() args = copy.copy(args) args.distance = 'hyperbolic' args.scaling = True execute_train(model_class=MLPEncoder, model_args=dict(layers=2, hidden_size=5, batch_norm=True), args=args) assert path.exists('MLPEncoder.pkl') assert path.exists('0.pkl') or path.exists('1.pkl') os.remove('MLPEncoder.pkl') remove_files(folder_name, dataset_name)
def test_transformer_model_output(self): folder_name, dataset_name, args = generate_dataset_and_parser() execute_train(model_class=Transformer, model_args=dict(segment_size=2, trans_layers=1, readout_layers=1, hidden_size=4, mask='empty', heads=2, layer_norm=True), args=args) assert path.exists('Transformer.pkl') assert path.exists('0.pkl') or path.exists('1.pkl') os.remove('Transformer.pkl') remove_files(folder_name, dataset_name)
def test_cnn_model_output(self): folder_name, dataset_name, args = generate_dataset_and_parser() execute_train(model_class=CNN, model_args=dict(readout_layers=1, channels=4, layers=2, kernel_size=3, pooling='avg', non_linearity=True, batch_norm=True, stride=1), args=args) assert path.exists('CNN.pkl') assert path.exists('0.pkl') or path.exists('1.pkl') os.remove('CNN.pkl') remove_files(folder_name, dataset_name)
def test_sequence_output(self): folder_name, edit_dataset_name, args = generate_dataset_and_parser() args = copy.copy(args) args.distance = 'cosine' # run method storing output execute_train(model_class=MLPEncoder, model_args=dict(layers=2, hidden_size=5, batch_norm=True), args=args) # check output tree assert path.exists('sequences.fasta'), "Sequences file missing" # remove files remove_files(folder_name, edit_dataset_name)
def test_euclidean_distance_stdout(self): folder_name, edit_dataset_name, closest_dataset_name, args = generate_dataset_and_parser() args = copy.copy(args) args.distance = 'euclidean' # run method storing output f = io.StringIO() with redirect_stdout(f): execute_train(model_class=MLPEncoder, model_args=dict(layers=2, hidden_size=5, batch_norm=True), args=args) out = f.getvalue() # check correct output assert 'Top1:' in out and 'Top5:' in out and 'Top10:' in out, 'Wrong output format for euclidean distance' # remove files remove_files(folder_name, edit_dataset_name, closest_dataset_name)
def test_cosine_distance_stdout(self): folder_name, edit_dataset_name, hc_dataset_name, args = generate_dataset_and_parser( ) args = copy.copy(args) args.distance = 'cosine' # run method storing output f = io.StringIO() with redirect_stdout(f): execute_train(model_class=MLPEncoder, model_args=dict(layers=2, hidden_size=5, batch_norm=True), args=args) out = f.getvalue() # check correct output assert "'single': {'DC'" in out and "'complete': {'DC'" in out and "'average': {'DC'" in out, \ 'Wrong output format for cosine distance' # remove files remove_files(folder_name, edit_dataset_name, hc_dataset_name)
from edit_distance.models.feedforward.model import MLPEncoder from edit_distance.train import execute_train, general_arg_parser from util.data_handling.data_loader import BOOL_CHOICE parser = general_arg_parser() parser.add_argument('--layers', type=int, default=1, help='Number of fully connected layers') parser.add_argument('--hidden_size', type=int, default=100, help='Size of hidden layers') parser.add_argument('--batch_norm', type=str, default='False', help='Batch normalization') args = parser.parse_args() assert args.batch_norm in BOOL_CHOICE, "Boolean values have to be either 'True' or 'False' " execute_train(model_class=MLPEncoder, model_args=dict(layers=args.layers, hidden_size=args.hidden_size, batch_norm=True if args.batch_norm == 'True' else False), args=args)
from edit_distance.models.transformer.model import Transformer from edit_distance.train import execute_train, general_arg_parser from util.data_handling.data_loader import BOOL_CHOICE parser = general_arg_parser() parser.add_argument('--trans_layers', type=int, default=1, help='Number of attention layers') parser.add_argument('--readout_layers', type=int, default=1, help='Number of final fully connected layers') parser.add_argument('--hidden_size', type=int, default=16, help='Size of hidden tokens') parser.add_argument('--mask', type=str, default="empty", help='Whether to apply a mask (empty or local{N})') parser.add_argument('--heads', type=int, default=1, help='Number of attention heads at each layer') parser.add_argument('--layer_norm', type=str, default='True', help='Layer normalization') parser.add_argument('--segment_size', type=int, default=5, help='Number of elements for each string token') args = parser.parse_args() assert args.layer_norm in BOOL_CHOICE, "Boolean values have to be either 'True' or 'False' " execute_train(model_class=Transformer, model_args=dict(segment_size=args.segment_size, trans_layers=args.trans_layers, readout_layers=args.readout_layers, hidden_size=args.hidden_size, mask=args.mask, heads=args.heads, layer_norm=True if args.layer_norm == 'True' else False), args=args)
default=3, help='Kernel size in convolutions') parser.add_argument('--pooling', type=str, default='avg', help='Pooling type (avg, max or none') parser.add_argument('--non_linearity', type=str, default='True', help='Whether to apply non-linearity to convolutions') parser.add_argument('--batch_norm', type=str, default='True', help='Batch normalization') args = parser.parse_args() assert args.non_linearity in BOOL_CHOICE and args.batch_norm in BOOL_CHOICE, \ "Boolean values have to be either 'True' or 'False' " execute_train( model_class=CNN, model_args=dict( readout_layers=args.readout_layers, channels=args.channels, layers=args.layers, kernel_size=args.kernel_size, pooling=args.pooling, non_linearity=True if args.non_linearity == 'True' else False, batch_norm=True if args.batch_norm == 'True' else False, stride=args.stride), args=args)
from edit_distance.models.kmer.model import Kmer from edit_distance.train import execute_train, general_arg_parser parser = general_arg_parser() parser.add_argument('--k', type=int, default=4, help='Size of kernel') args = parser.parse_args() execute_train(model_class=Kmer, model_args=dict(k=args.k), args=args)
from edit_distance.models.recurrent.model import GRU from edit_distance.train import execute_train, general_arg_parser parser = general_arg_parser() parser.add_argument('--recurrent_layers', type=int, default=1, help='Number of recurrent layers') parser.add_argument('--readout_layers', type=int, default=2, help='Number of readout layers') parser.add_argument('--hidden_size', type=int, default=100, help='Size of hidden dimension') args = parser.parse_args() execute_train(model_class=GRU, model_args=dict(recurrent_layers=args.recurrent_layers, readout_layers=args.readout_layers, hidden_size=args.hidden_size), args=args)