Ejemplo n.º 1
0
def make_model():
    source_dictionary = Dictionary.load(
        "data/data-bin/writingPrompts/dict.wp_source.txt")
    target_dictionary = Dictionary.load(
        "data/data-bin/writingPrompts/dict.wp_target.txt")

    encoder = ConvEncoder(source_dictionary,
                          embed_dim=256,
                          convolutions=[(128, 3)] * 2 + [(512, 3)] * 1,
                          dropout=0.1,
                          max_positions=1500,
                          attention=True,
                          attention_nheads=1)

    decoder = ConvDecoder(
        target_dictionary,
        embed_dim=256,
        convolutions=[(512, 4)] * 4 + [(768, 4)] * 2 + [(1024, 4)] * 1,
        out_embed_dim=256,
        attention=True,
        dropout=0.1,
        max_positions=1500,
        selfattention=True,
        attention_nheads=1,
        selfattention_nheads=4,
        project_input=True,
        gated_attention=True,
        downsample=True,
    )

    model = models.BaseModel(encoder, decoder)
    return model
Ejemplo n.º 2
0
def load_all_model(root_dir, device=0):
    model_corpus = []
    for i in range(17):
        config_file = os.path.join(root_dir, str(i), "config.json")
        with open(config_file, 'r') as fin:
            config = json.load(fin)
        args = argparse.Namespace(**config)
        item = []
        for j in range(args.model_num):
            if args.model_type == 'lstm':
                model = models.LSTMModel(args)
            elif args.model_type == 'conv':
                model = models.ConvModel(args)
            elif args.model_type == 'char':
                model = models.CharCNNModel(args)
            elif args.model_type == 'base':
                model = models.BaseModel(args)
            else:
                raise NotImplementedError
            model_path = os.path.join(
                args.checkpoint_path, str(i),
                "%s_%s" % (args.model_type, args.type_suffix),
                "model_%d.pth" % j)
            if not os.path.isfile(model_path):
                print("No model to test")
                exit(1)
            model.load_state_dict(torch.load(model_path))
            model = model.cuda(device)
            model.eval()
            item.append(model)
        model_corpus.append(item)
    return model_corpus
Ejemplo n.º 3
0
 def setUp(self):
     """Finds json file and renames it so we can make and destroy
     files without deleting data in use"""
     self.path = models.storage._FileStorage__file_path
     if os.path.exists(self.path):
         os.rename(self.path, 'noTouch')
     self.testcase = models.BaseModel()
Ejemplo n.º 4
0
    def reload(self):
        '''This is the 'reload' instance.
        Deserializes the JSON file to __objects.
        If JSON file does not exist, do nothing.

        Return: __object or nothing
        '''
        if os.path.isfile(FileStorage.__file_path) is True:
            with open(FileStorage.__file_path, 'r+', encoding='utf-8') as fn:
                obj = json.load(fn)
                for key in obj.keys():
                    is_dict = obj[key]
                    is_class = is_dict['__class__']
                    if 'BaseModel' in is_class:
                        FileStorage.__objects[key] = models.BaseModel(obj[key])
                    if 'Amenity' in is_class:
                        FileStorage.__objects[key] = models.Amenity(obj[key])
                    if 'City' in is_class:
                        FileStorage.__objects[key] = models.City(obj[key])
                    if 'Place' in is_class:
                        FileStorage.__objects[key] = models.Place(obj[key])
                    if 'Review' in is_class:
                        FileStorage.__objects[key] = models.Review(obj[key])
                    if 'State' in is_class:
                        FileStorage.__objects[key] = models.State(obj[key])
                    if 'User' in is_class:
                        FileStorage.__objects[key] = models.User(obj[key])
Ejemplo n.º 5
0
 def setUp(self):
     '''
         Sets up the environment for testing DBStorage
     '''
     os.environ['HBNB_TYPE_STORAGE'] = 'db'
     os.environ['HBNB_MYSQL_USER'] = '******'
     os.environ['HBNB_MYSQL_PWD'] = 'hbnb_test_pwd'
     os.environ['HBNB_MYSQL_HOST'] = 'localhost'
     os.environ['HBNB_MYSQL_DB'] = 'hbnb_test_db'
     self.storage = DBStorage()
     self.my_model = models.BaseModel()
     self.storage.reload()
Ejemplo n.º 6
0
 def test_kwargs_base_model(self):
     """Test kwargs on init method"""
     testdict = {
         'created_at': '2018-02-14T04:20:11.699297',
         'updated_at': '2018-02-14T04:20:11.699315',
         '__class__': 'BaseModel',
         'id': '04fac3ec-9ed6-434e-9671-cc47420ebe3d'
     }
     self.testcase = models.BaseModel(**testdict)
     self.assertEqual(self.testcase.id,
                      '04fac3ec-9ed6-434e-9671-cc47420ebe3d')
     self.assertEqual(type(self.testcase.updated_at), datetime)
     self.assertEqual(type(self.testcase.created_at), datetime)
     self.assertEqual(self.testcase.updated_at.isoformat(),
                      '2018-02-14T04:20:11.699315')
     self.assertEqual(self.testcase.created_at.isoformat(),
                      '2018-02-14T04:20:11.699297')
Ejemplo n.º 7
0
 def do_create(self, arg):
     '''Creates a new instance of BaseModel, save to JSON file.'''
     args = arg.split()
     if len(args) < 1:
         print(self.errors['noclass'])
     elif args[0] in self.new_class:
         if args[0] == 'BaseModel':
             new = models.BaseModel()
         if args[0] == 'Amenity':
             new = models.Amenity()
         if args[0] == 'City':
             new = models.City()
         if args[0] == 'Place':
             new = models.Place()
         if args[0] == 'Review':
             new = models.Review()
         if args[0] == 'State':
             new = models.State()
         if args[0] == 'User':
             new = models.User()
         new.save()
         print('{}'.format(new.id))
     else:
         print(self.errors['badclass'])
Ejemplo n.º 8
0
def train(args, model_id, tb):
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    train_data = MedicalEasyEnsembleDataloader(args.train_data, args.class_id,
                                               args.batch_size, True,
                                               args.num_workers)
    val_data = MedicalEasyEnsembleDataloader(args.val_data, args.class_id,
                                             args.batch_size, False,
                                             args.num_workers)
    if os.path.exists(args.w2v_file):
        embedding = utils.load_embedding(args.w2v_file,
                                         vocab_size=args.vocab_size,
                                         embedding_size=args.embedding_size)
    else:
        embedding = None
    if args.model_type == 'lstm':
        model = models.LSTMModel(args, embedding)
    elif args.model_type == 'conv':
        model = models.ConvModel(args, embedding)
    elif args.model_type == 'char':
        model = models.CharCNNModel(args, embedding)
    elif args.model_type == 'base':
        model = models.BaseModel(args, embedding)
    else:
        raise NotImplementedError
    if os.path.isfile(
            os.path.join(args.checkpoint_path, str(args.class_id),
                         "%s_%s" % (args.model_type, args.type_suffix),
                         "model_%d.pth" % model_id)):
        print("Load %d class %s type %dth model from previous step" %
              (args.class_id, args.model_type, model_id))
        model.load_state_dict(
            torch.load(
                os.path.join(args.checkpoint_path, str(args.class_id),
                             "%s_%s" % (args.model_type, args.type_suffix),
                             "model_%d.pth" % model_id)))
    iteration = 0
    model = model.cuda(args.device)
    model.train()
    optimizer = utils.build_optimizer(args, model)
    loss_func = MultiBceLoss()
    cur_worse = 1000
    bad_times = 0
    for epoch in range(args.epochs):
        if epoch >= args.start_epoch:
            factor = (epoch - args.start_epoch) // args.decay_every
            decay_factor = args.decay_rate**factor
            current_lr = args.lr * decay_factor
            utils.set_lr(optimizer, current_lr)
        # if epoch != 0 and epoch % args.sample_every == 0:
        #     train_data.re_sample()
        for i, data in enumerate(train_data):
            tmp = [
                _.cuda(args.device) if isinstance(_, torch.Tensor) else _
                for _ in data
            ]
            report_ids, sentence_ids, sentence_lengths, output_vec = tmp
            optimizer.zero_grad()
            loss = loss_func(model(sentence_ids, sentence_lengths), output_vec)
            loss.backward()
            train_loss = loss.item()
            optimizer.step()
            iteration += 1
            if iteration % args.print_every == 0:
                print("iter %d epoch %d loss: %.3f" %
                      (iteration, epoch, train_loss))

            if iteration % args.save_every == 0:
                torch.save(
                    model.state_dict(),
                    os.path.join(args.checkpoint_path, str(args.class_id),
                                 "%s_%s" % (args.model_type, args.type_suffix),
                                 "model_%d.pth" % model_id))
                with open(os.path.join(args.checkpoint_path,
                                       str(args.class_id), "config.json"),
                          'w',
                          encoding='utf-8') as config_f:
                    json.dump(vars(args), config_f, indent=2)
                with open(os.path.join(
                        args.checkpoint_path, str(args.class_id),
                        "%s_%s" % (args.model_type, args.type_suffix),
                        "config.json"),
                          'w',
                          encoding='utf-8') as config_f:
                    json.dump(vars(args), config_f, indent=2)
            if iteration % args.val_every == 0:
                val_loss = eval_model(model, loss_func, val_data, epoch)
                tb.add_scalar("model_%d val_loss" % model_id, val_loss,
                              iteration)
                if val_loss > cur_worse:
                    print("Bad Time Appear")
                    cur_worse = val_loss
                    bad_times += 1
                else:
                    cur_worse = val_loss
                    bad_times = 0
                if bad_times > args.patient:
                    print('Early Stop !!!!')
                    return
            if iteration % args.loss_log_every == 0:
                tb.add_scalar("model_%d train_loss" % model_id, loss.item(),
                              iteration)

    print("The train finished")
Ejemplo n.º 9
0
def show_candidates(name):
    '''
    Present an entity and its candidate links for annotation.
    '''
    # Load json data from file
    path = os.path.join(*[abs_path, 'users', name, 'art.json'])
    data = json.load(codecs.open(path, 'r', 'utf-8'))

    # Get instance index or id
    no_instances = len(data['instances'])
    last_instance = no_instances - 1

    if request.query.id:
        index = None
        for i in data['instances']:
            if i['id'] == int(request.query.id):
                index = data['instances'].index(i)
                break
        if not index:
            abort(500, 'Identifier not found in dataset.')
    elif request.query.index:
        index = int(request.query.index)
    else:
        # First instance that hasn't been linked yet
        index = 0
        for i in data['instances']:
            if not i['links']:
                index = data['instances'].index(i)
                break
    if index >= no_instances:
        index = 0
    if index <= -1:
        index = last_instance

    # Get instance data
    instance_id = data['instances'][index]['id']
    url = data['instances'][index]['url']
    ne = data['instances'][index]['ne_string']
    ne_type = data['instances'][index]['ne_type']
    links = data['instances'][index]['links']

    # Get OCR and publication year
    context = dac.Context(url)
    ocr = re.sub(
        '(?P<pf>(^|\W|:punct:))' + re.escape(ne) + '(?P<sf>(\W|$|:punct:))',
        '\g<pf>' + '<span style="background-color:yellow;">' + ne + '</span>' +
        '\g<sf>', context.ocr)

    # Get candidates
    cluster = dac.Cluster([dac.Entity(ne, tpta_type=ne_type, context=context)])

    if cluster.entities[0].valid:
        model = models.BaseModel()
        cand_list = dac.CandidateList(cluster, model)
        candidates = cand_list.candidates
    else:
        candidates = []

    return template('index',
                    last_instance=last_instance,
                    index=index,
                    instance_id=instance_id,
                    url=url,
                    ne=ne,
                    ne_type=ne_type,
                    publ_date=context.publ_year,
                    ocr=ocr,
                    links=links,
                    candidates=candidates)
Ejemplo n.º 10
0
 def test_id_base_model(self):
     """Test id created correctly and is unique every time"""
     self.assertEqual(len(self.testcase.id), 36)
     self.testcase2 = models.BaseModel()
     self.assertNotEqual(self.testcase.id, self.testcase2.id)
     self.testcase2 = None