예제 #1
0
    def finish(engine):
        if engine.bg_task_id is not None:
            revoke(engine.bg_task_id, terminate=True)
            engine.bg_task_id = None

        if engine.pid is not None:
            executioner = subprocess.Popen("kill -9 {}".format(engine.pid),
                                           shell=True)
            engine.pid = None

        if engine.gid is not None:
            GPUManager.free_device(engine.gid)
            engine.gid = None

        db.session.commit()
예제 #2
0
def train_engine(self, engine_id, is_admin):
    # Trains an engine by calling JoeyNMT and keeping
    # track of its progress

    engine = Engine.query.filter_by(id=engine_id).first()
    engine.status = "launching"
    db.session.commit()

    gpu_id = GPUManager.wait_for_available_device(is_admin=is_admin)
    engine.gid = gpu_id
    db.session.commit()

    try:
        env = os.environ.copy()
        env["CUDA_VISIBLE_DEVICES"] = "{}".format(gpu_id)
        running_joey = subprocess.Popen([
            "python3", "-m", "joeynmt", "train", "-t",
            os.path.join(engine.path, "config.yaml")
        ],
                                        cwd=app.config['JOEYNMT_FOLDER'],
                                        env=env)

        engine.status = "training"
        engine.pid = running_joey.pid
        db.session.commit()

        # Trainings are limited to 1 hour
        start = datetime.datetime.now()
        difference = 0

        while difference < 3600:
            time.sleep(10)
            difference = (datetime.datetime.now() - start).total_seconds()
            if running_joey.poll() is not None:
                # JoeyNMT finished (or died) before timeout
                db.session.refresh(engine)
                if engine.status != "stopped" and engine.status != "stopped_admin":
                    Trainer.stop(engine_id)
                GPUManager.free_device(gpu_id)
                return

        if running_joey.poll() is None:
            Trainer.stop(engine_id)
    finally:
        engine.status = "stopped"
        GPUManager.free_device(gpu_id)
        db.session.commit()
예제 #3
0
    def load(self):
        # build model and load parameters into it
        model_checkpoint = load_checkpoint(self.ckpt, self.use_cuda)
        self.model = build_model(self.model_data,
                                 src_vocab=self.src_vocab,
                                 trg_vocab=self.trg_vocab)
        self.model.load_state_dict(model_checkpoint["model_state"])

        if self.use_cuda:
            self.gpu_id = GPUManager.wait_for_available_device(
                is_admin=self.is_admin)
            if self.gpu_id is not None:
                self.model.cuda(self.gpu_id)
            else:
                return False
        return True
예제 #4
0
 def __del__(self):
     if self.gpu_id is not None:
         GPUManager.free_device(self.gpu_id)
예제 #5
0
파일: __init__.py 프로젝트: Prompsit/mutnmt
for running_engine in models.RunningEngines.query.all():
    db.session.delete(running_engine)
    db.session.commit()

TOPICS = [
    "General", "Technical", "Legal", "Financial", "Medical", "Religion",
    "Politics", "Administrative", "Subtitles", "Patents", "News", "Books",
    "Other"
]

for topic in TOPICS:
    if models.Topic.query.filter_by(name=topic).first() is None:
        topic_obj = models.Topic(name=topic)
        db.session.add(topic_obj)
db.session.commit()

folders = [
    'USERSPACE_FOLDER', 'STORAGE_FOLDER', 'FILES_FOLDER', 'ENGINES_FOLDER',
    'USERS_FOLDER'
]

for folder in folders:
    try:
        os.stat(app.config[folder])
    except:
        os.mkdir(app.config[folder])

from app.utils.GPUManager import GPUManager
GPUManager.scan_devices(reset=True, is_admin=True)