Example #1
0
    def test_train(self):
        """
        Perform train on very simple dataset, consisting only with single example
        """
        data = ['aa']
        encoder = input.InputEncoder(data)
        net = model.Model(len(encoder), hidden_size=10)
        objective = nn.CrossEntropyLoss()
        optimizer = optim.SGD(net.parameters(), lr=0.1)
        max_loss = min_loss = None

        for _ in range(100):
            net.zero_grad()
            packed_seq, true_indices = input.batch_to_train(data, encoder)
            out, _ = net(packed_seq)
            v_loss = objective(out, true_indices)
            v_loss.backward()
            optimizer.step()
            l = v_loss.data[0]
            if max_loss is None:
                max_loss = min_loss = l
            else:
                max_loss = max(max_loss, l)
                min_loss = min(min_loss, l)

        loss_drop = max_loss / min_loss
        self.assertGreater(loss_drop, 10.0)

        # try to predict letters
        out = model.inference_one(encoder.START_TOKEN, encoder, net)
        self.assertEquals(out, 'a')
        out = model.inference_one('a', encoder, net)
        self.assertEquals(out, 'a')
        out = model.inference_one('aa', encoder, net)
        self.assertEquals(out, encoder.END_TOKEN)
Example #2
0
    def test_simple(self):
        encoder = input.InputEncoder(['abcde'])
        items = ["abc", "ab"]
        lens = list(map(len, items))

        dat = np.array([
            encoder.encode(s, width=max(lens))
            for s in items
        ])

        v_dat = Variable(torch.from_numpy(dat))
        seq = rnn_utils.pack_padded_sequence(v_dat, lens, batch_first=True)

        m = model.Model(len(encoder), 16)
        res = m.forward(seq)
def run(config):
    """Entry point to run training."""
    init_data_normalizer(config)

    stage_ids = train_util.get_stage_ids(**config)
    if not config['train_progressive']:
        stage_ids = list(stage_ids)[-1:]

    # Train one stage at a time
    for stage_id in stage_ids:
        batch_size = train_util.get_batch_size(stage_id, **config)
        tf.reset_default_graph()
        with tf.device(tf.train.replica_device_setter(config['ps_tasks'])):
            model = lib_model.Model(stage_id, batch_size, config)
            model.add_summaries(stage_id)
            print('Variables:')
            for v in tf.global_variables():
                print('\t', v.name, v.get_shape().as_list())
            logging.info('Calling train.train')
            train_util.train(model, **config)
Example #4
0
 def test_search(self):
     m = model.Model(data={
         "hello": {
             "id": 1,
             "a": "aaa"
         },
         "no": {
             "id": 2,
             "a": "bba"
         }
     })
     self.assertEqual(m.search("", field_key="a"),
                      {})  # test if empty string doesn't return everithing
     self.assertEqual(m.search("aa", field_key="a"),
                      {1: {
                          "id": 1,
                          "a": "aaa"
                      }})  # test if search finds matching item
     self.assertEqual(m.search("x", field_key="a"),
                      {})  # test if not matching phrase return empty result
Example #5
0
    def test_generate_batch_2(self):
        data = ['aa', 'b']
        encoder = input.InputEncoder(data)
        net = model.Model(len(encoder), hidden_size=10)
        objective = nn.CrossEntropyLoss()
        optimizer = optim.SGD(net.parameters(), lr=0.1)
        max_loss = min_loss = None

        for _ in range(1000):
            net.zero_grad()
            packed_seq, true_indices = input.batch_to_train(data, encoder)
            out, _ = net(packed_seq)
            v_loss = objective(out, true_indices)
            v_loss.backward()
            optimizer.step()
            l = v_loss.data[0]
            if max_loss is None:
                max_loss = min_loss = l
            else:
                max_loss = max(max_loss, l)
                min_loss = min(min_loss, l)

        res = [model.generate_name(net, encoder) for _ in range(10)]
        print(res)
Example #6
0
        format="%(asctime)-15s %(levelname)s %(name)-14s %(message)s",
        level=logging.INFO)
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda",
                        default=False,
                        action='store_true',
                        help="Enable cuda")
    args = parser.parse_args()

    data = input.read_data()
    input_encoder = input.InputEncoder(data)

    log.info("Read %d train samples, encoder len=%d, first 10: %s", len(data),
             len(input_encoder), ', '.join(data[:10]))

    net = model.Model(len(input_encoder), hidden_size=HIDDEN_SIZE)
    if args.cuda:
        net = net.cuda()
    objective = nn.CrossEntropyLoss()

    lr = 0.004
    # decay of LR every 100 epoches
    lr_decay = 0.9
    optimizer = optim.RMSprop(net.parameters(), lr=lr)

    for epoch in range(EPOCHES):
        losses = []
        start_ts = time.time()

        for batch in input.iterate_batches(data, BATCH_SIZE):
            net.zero_grad()
Example #7
0
            id = len(nodes)

            conds = map(lambda x: x.id1,
                        filter(lambda x: x.id2 == id, interactions))
            nodes.append(node.Node(id, chain, ndx, conds))

            if (id not in paired):
                unpaired.append(id)

    # save a definition file and set the definition input to that file
    model = model.Model(name,
                        version,
                        ref_seqs,
                        data_sources,
                        nodes,
                        None,
                        pairing,
                        unpaired,
                        None,
                        sep_min=4,
                        sep_max=0,
                        symmetric=symmetric)
    model_parser.ModelParser().write(def_file, model)

# open the definition file
def_model = model_parser.ModelParser().parse(def_file)

check_def_model(def_file, def_model)

data = []

# for each source file
Example #8
0
 def test_object_creation(self):
     m = model.Model(data={"hello": "world"})
     self.assertIsInstance(m, model.Model)
     self.assertEqual(m._data, {"hello": "world"})