Exemple #1
0
def generate_dataset_and_parser():
    folder_name = ''.join(random.choice(string.ascii_lowercase) for _ in range(10))
    generator = IndependentGenerator(alphabet_size=ALPHABET_SIZE, seed=0)
    edit_dataset_name = folder_name + '/test_ed.pkl'
    strings = [generate_random_dna(50)] + [generate_random_dna(random.randint(10, 50)) for _ in range(19)]
    strings_dict = {'train': strings[:10], 'val': strings[10:15], 'test': strings[15:]}
    edit_dataset = EditDistanceGenomicDatasetGenerator(strings=strings_dict)
    edit_dataset.save_as_pickle(edit_dataset_name)

    parser = general_arg_parser()
    args = parser.parse_args()
    args.data = edit_dataset_name
    args.epochs = 2
    args.print_every = 1
    args.construct_msa_tree = 'True'
    return folder_name, edit_dataset_name, args
Exemple #2
0
def generate_dataset_and_parser():
    folder_name = ''.join(
        random.choice(string.ascii_lowercase) for _ in range(10))
    generator = IndependentGenerator(alphabet_size=ALPHABET_SIZE, seed=0)
    edit_dataset_name = folder_name + '/test_ed.pkl'
    edit_dataset = EditDistanceDatasetGenerator(N_batches={
        "train": 2,
        "val": 2,
        "test": 2
    },
                                                batch_size={
                                                    "train": 5,
                                                    "val": 3,
                                                    "test": 3
                                                },
                                                len_sequence={
                                                    "train": 10,
                                                    "val": 10,
                                                    "test": 10
                                                },
                                                max_changes={
                                                    "train": 2,
                                                    "val": 2,
                                                    "test": 2
                                                },
                                                string_generator=generator,
                                                seed=0)
    edit_dataset.save_as_pickle(edit_dataset_name)

    hc_dataset_name = folder_name + '/test_hc.pkl'
    hc_dataset = HierarchicalClusteringDatasetGenerator(
        N_reference=3,
        N_leaves=4,
        len_sequence=10,
        min_changes=2,
        max_changes=4,
        string_generator=generator,
        seed=0)
    hc_dataset.save_as_pickle(hc_dataset_name)

    parser = general_arg_parser()
    args = parser.parse_args()
    args.data = edit_dataset_name
    args.epochs = 2
    args.print_every = 1
    args.hierarchical_data_path = hc_dataset_name
    return folder_name, edit_dataset_name, hc_dataset_name, args
Exemple #3
0
def generate_dataset_and_parser():
    folder_name = ''.join(random.choice(string.ascii_lowercase) for _ in range(10))
    generator = IndependentGenerator(alphabet_size=ALPHABET_SIZE, seed=0)
    dataset_name = folder_name + '/test_ed_model.pkl'
    dataset = EditDistanceDatasetGenerator(
        N_batches={"train": 2, "val": 2, "test": 2},
        batch_size={"train": 5, "val": 3, "test": 3},
        len_sequence={"train": 10, "val": 10, "test": 10},
        max_changes={"train": 2, "val": 2, "test": 2},
        string_generator=generator, seed=0)
    dataset.save_as_pickle(dataset_name)

    parser = general_arg_parser()
    args = parser.parse_args()
    args.data = dataset_name
    args.epochs = 2
    args.print_every = 1
    args.distance = "euclidean"
    return folder_name, dataset_name, args
Exemple #4
0
from edit_distance.models.feedforward.model import MLPEncoder
from edit_distance.train import execute_train, general_arg_parser
from util.data_handling.data_loader import BOOL_CHOICE

parser = general_arg_parser()
parser.add_argument('--layers', type=int, default=1, help='Number of fully connected layers')
parser.add_argument('--hidden_size', type=int, default=100, help='Size of hidden layers')
parser.add_argument('--batch_norm', type=str, default='False', help='Batch normalization')

args = parser.parse_args()

assert args.batch_norm in BOOL_CHOICE, "Boolean values have to be either 'True' or 'False' "

execute_train(model_class=MLPEncoder,
              model_args=dict(layers=args.layers,
                              hidden_size=args.hidden_size,
                              batch_norm=True if args.batch_norm == 'True' else False),
              args=args)