def load_books_vecs(df): if exists(path_): with open(path_, 'rb') as f: vecs = np.load(f) if df.shape[0] == vecs.shape[0]: return vecs # else books table has changed, recompute logger.info(f"Running BERT on {df.shape[0]} entries") texts = (df.title + '\n' + df.text).tolist() from app.nlp import nlp_ vecs = nlp_.sentence_encode(texts) nlp_.clear() with open(path_, 'wb') as f: np.save(f, vecs) return vecs
def run_job(job): jid_, k = str(job.id), job.method jid = {'jid': jid_} with session() as sess: data = sess.execute("select data_in from jobs where id=:jid", jid).fetchone().data_in args = data.get('args', []) kwargs = data.get('kwargs', {}) if k in ('entries', 'profiles'): kwargs['job_id'] = jid_ if k == 'books': nlp_.clear() os.system(f"python app/books.py --jid={jid_} --uid={args[0]}") return def fn(): return m[k](*args, **kwargs) M.Job.wrap_job(jid_, k, fn)
if __name__ == '__main__': logger.info(f"torch.cuda.current_device() {torch.cuda.current_device()}") logger.info(f"torch.cuda.device(0) {torch.cuda.device(0)}") logger.info(f"torch.cuda.device_count() {torch.cuda.device_count()}") logger.info( f"torch.cuda.get_device_name(0) {torch.cuda.get_device_name(0)}") logger.info(f"torch.cuda.is_available() {torch.cuda.is_available()}") logger.info("\n\n") with session() as sess: while True: M.Machine.notify_online(sess, vars.MACHINE) cloud_down_maybe(sess) # only allow 2 jobs at a time. if M.Machine.job_ct_on_machine(sess, vars.MACHINE) >= 2: time.sleep(1) continue # Find jobs job = M.Job.take_job(sess, "run_on='gpu'") if job: # aaf1ec95: multiprocessing.Process for problem models threading.Thread(target=run_job, args=(job, )).start() # run_job(job.id) if M.User.last_checkin(sess) > 10 and is_prod(): nlp_.clear() time.sleep(1)
def clear_sess(db): nlp_.clear() db.execute("delete from jobs") db.commit()