class Spawner: def __init__(self): self.config = Config() client = pymongo.MongoClient(f"mongodb+srv://explorer:{self.config.MongoPass}@cluster0-eyzcm.mongodb.net/test?retryWrites=true&w=majority") self.db = client.plutoQ self.cm = ContainerManager() self.status = Status() def insertRef(self): nbDoc = nbformat.read(self.nbFileName, as_version=4) nbCells = nbDoc['cells'] markdownCells = [x for x in nbCells if x['cell_type'] == 'markdown'] hasRef = False for mdc in markdownCells: print (mdc['source']) if 'pluto.studio' in mdc['source']: hasRef = True break if not hasRef: outObj = nbformat.NotebookNode(cell_type='markdown', metadata={}, source=["This notebook was created using [pluto](http://pluto.studio). Learn more [here](https://github.com/shyams80/pluto)"]) nbCells.append(outObj) nbformat.write(nbDoc, self.nbFileName, version=4) def getOutputLength(self): nbDoc = nbformat.read(self.nbFileName, as_version=4) textLength = 0 for nbCell in nbDoc['cells']: if nbCell['cell_type'] != 'code': continue for nbOut in nbCell['outputs']: if nbOut['output_type'] != 'stream' or 'name' not in nbOut or nbOut['name'] != 'stdout': continue textLength = textLength + len(nbOut['text']) return textLength def upsertGithub(self, diskFileName, githubFileName): print(f"upserting: {diskFileName} to {githubFileName}") with open(diskFileName, mode='rb') as file: outFileContent = file.read() try: fileContent = self.repo.get_contents(githubFileName) self.repo.update_file(githubFileName, "response", outFileContent, fileContent.sha) except Exception as exp: print(exp) if exp.status == 404: try: self.repo.create_file(githubFileName, "response", outFileContent) except Exception as exp2: print("Error creating file on github: " + githubFileName) print(exp2) def Execute(self, meta): print(meta) qId = ObjectId(meta['id']) print('acquiring egg') self.status.Update(qId, 'acquiring egg') egg = self.cm.GetProcessor(qId, meta['githubUser']) request = self.db.q.find_one({'_id': qId}) githubUserName = request['githubUser'] print(f"processing for: {githubUserName}") self.status.Update(qId, 'processing') githubAcc = Github(request['githubTok']) user = githubAcc.get_user() self.repo = user.get_repo("plutons") self.plutoPath = "/home/pluto/notebook-temp/" + meta['id'] + "/" try: os.makedirs(self.plutoPath) except FileExistsError: pass fullPath = request['file'] notebook = gzip.decompress(request['notebook']) githubFileName = fullPath[fullPath.rfind('/')+1:] githubPath = fullPath[:fullPath.rfind('/')] self.nbFileName = self.plutoPath + githubFileName print(f"processing notebook: {self.nbFileName}") with open(self.nbFileName, mode='wb') as file: file.write(notebook) cmdLine = f"jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace {self.nbFileName}" subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True) self.insertRef() egg.files.recursive_put(self.plutoPath, "/home/pluto/") print(f"executing in egg") self.status.Update(qId, 'executing in egg') egg.execute(shlex.split(f"jupyter nbconvert --to notebook --execute /home/pluto/{githubFileName} --inplace --allow-errors --ExecutePreprocessor.timeout=1200")) resp = egg.files.get(f"/home/pluto/{githubFileName}") with open(self.nbFileName, mode='wb') as file: file.write(resp) textLength = self.getOutputLength() print(f"total output length: {textLength}") if githubUserName != 'shyams80' and textLength > 10000: cmdLine = f"jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace {self.nbFileName}" subprocess.run(shlex.split(cmdLine), env=os.environ, errors=True) nbDoc = nbformat.read(self.nbFileName, as_version=4) for nbCell in nbDoc['cells']: if nbCell['cell_type'] != 'code' and nbCell['source'] != None: continue nbCell['execution_count'] = 1 outObj = nbformat.NotebookNode(output_type='stream', name='stderr', text=['total output string length exceeded 10000 characters. please stay within the limit.']) nbCell['outputs'].append(outObj) break nbformat.write(nbDoc, self.nbFileName, version=4) with open(self.nbFileName, mode='rb') as file: outFileContent = file.read() tooBig = False try: fileContent = self.repo.get_contents(fullPath) self.repo.update_file(fullPath, "response", outFileContent, fileContent.sha) except Exception as exp: print(exp) if exp.data["errors"][0]['code'] == 'too_large': tooBig = True if tooBig: print("file is too big!") self.status.Update(qId, 'file is too big!') egg.execute(shlex.split(f"jupyter nbconvert --to markdown --execute /home/pluto/{githubFileName} --allow-errors --ExecutePreprocessor.timeout=1200")) filePattern = githubFileName.replace(".ipynb", "") self.status.Update(qId, 'creating markdown...') egg.execute(shlex.split(f"./tard.sh {filePattern}")) resp = egg.files.get(f"/home/pluto/{filePattern}.tar.gz") with open(f"{self.plutoPath}{filePattern}.tar.gz", mode='wb') as file: file.write(resp) subprocess.run(shlex.split(f"tar xvf {self.plutoPath}{filePattern}.tar.gz -C {self.plutoPath}"), env=os.environ, errors=True) self.status.Update(qId, 'uploading markdown...') self.upsertGithub(f"{self.plutoPath}{filePattern}.md", f"{githubPath}/{filePattern}.md") if os.path.isdir(f"{self.plutoPath}{filePattern}_files"): self.status.Update(qId, 'uploading images...') for fname in os.listdir(f"{self.plutoPath}{filePattern}_files"): self.upsertGithub(f"{self.plutoPath}{filePattern}_files/{fname}", f"{githubPath}/{filePattern}_files/" + fname) egg.files.delete(f"/home/pluto/{githubFileName}") shutil.rmtree(self.plutoPath) self.db.q.update_one({'_id': qId}, {'$set': {'isProcessed': True, 'processedOn': datetime.now(), 'notebook': gzip.compress(outFileContent)}}) self.status.Update(qId, 'finished')
class Enqueuer: def __init__(self): self.config = Config() self.metaQ = Queue('pluto', connection=Redis('windows', 6379, db=1), default_timeout=1 * 3600) self.status = Status() client = pymongo.MongoClient( f"mongodb+srv://explorer:{self.config.MongoPass}@cluster0-eyzcm.mongodb.net/test?retryWrites=true&w=majority" ) self.db = client.plutoQ self.cm = ContainerManager() def Cleanup(self): cutoff = datetime.now() - timedelta(days=30) dmRet = self.db.q.delete_many({'createdOn': {'$lt': cutoff}}) print(f"deleted {dmRet.deleted_count} from mongo queue") activeUsers = self.db.q.distinct('githubUser') self.cm.KeepOnly(activeUsers) def Queue(self, meta): self.metaQ.enqueue(Spawn, meta, result_ttl=0) def Loop(self): request = self.db.q.find_one({ '$or': [{ '$and': [{ 'isEnqueued': { '$exists': True } }, { 'isEnqueued': False }] }, { '$and': [{ 'isEnqueued': { '$exists': False } }, { 'isProcessed': False }] }] }) if request == None: return qId = ObjectId(request['_id']) self.db.q.update_one( {'_id': qId}, {'$set': { 'isEnqueued': True, 'enqueuedOn': datetime.now() }}) self.status.Update(qId, 'queued') meta = {"id": str(request['_id']), "githubUser": request['githubUser']} #Spawn(meta) self.Queue(meta) def FailLoop(self): print( f"total number of failed jobs: {self.metaQ.failed_job_registry.count}" ) for failedId in self.metaQ.failed_job_registry.get_job_ids(): failedJob = self.metaQ.fetch_job(failedId) failedMeta = failedJob.args[0] print(failedMeta) qId = ObjectId(failedMeta['id']) self.db.q.update_one( {'_id': qId}, {'$set': { 'isProcessed': True, 'processedOn': datetime.now() }}) self.status.Update(qId, 'system failure. try again!') self.metaQ.remove(failedId) self.metaQ.failed_job_registry.remove(failedJob)
class ContainerManager: def __init__(self): self.config = Config() #self.client = pylxd.Client(endpoint=self.config['DEFAULT']['LXD_URL'], cert=(self.config['DEFAULT']['LXD_CERT'], self.config['DEFAULT']['LXD_KEY']), verify=False) self.client = pylxd.Client() #client.authenticate('ferrari') self.status = Status() def DeleteProcessor(self, githubUserName): if githubUserName == "goose": raise Exception("maverick!") try: cntnr = self.client.containers.get(githubUserName) if cntnr.state().status_code != 102: cntnr.stop(wait=True) cntnr.delete(wait=True) except pylxd.exceptions.NotFound: pass def KeepOnly(self, githubUserNames): existingContainers = [n.name for n in self.client.containers.all()] existingContainers.remove('goose') toDelete = set(existingContainers).difference(set(githubUserNames)) for td in toDelete: self.DeleteProcessor(td) def GetProcessor(self, qId, githubUserName): if githubUserName == "goose": raise Exception("maverick!") egg = None try: #first check if goose's egg exists images = self.client.images.all() templates = [ t for t in images if len(t.aliases) > 0 and t.aliases[0]['name'] == 'goose' ] #the goose image is deleted if there has been an update to pluto or new libraries are installed if len(templates) == 0: self.status.Update(qId, 'cloning the goose... can take a while') goose = self.client.containers.get('goose') if goose.state().status_code != 102: goose.stop(wait=True) template = goose.publish(public=True, wait=True) template.add_alias('goose', 'lays golden eggs') goose.start(wait=True) self.status.Update(qId, 'goose is loose!') images = self.client.images.all() templates = [ t for t in images if len(t.aliases) > 0 and t.aliases[0]['name'] == 'goose' ] template = templates[0] egg = self.client.containers.get(githubUserName) #if the egg was created before the goose image, delete it and re-create if egg.created_at < template.uploaded_at: self.DeleteProcessor(githubUserName) raise pylxd.exceptions.NotFound(response="redo") if egg.state().status_code == 102: egg.start(wait=True) self.status.Update(qId, 'egg initialized') except pylxd.exceptions.NotFound: #create an egg from the goose image dna = { "ephemeral": False, "name": githubUserName, "source": { "type": "image", "certificate": "", "fingerprint": template.fingerprint } } self.status.Update(qId, 'laying an egg... can take a while') egg = self.client.containers.create(dna, wait=True) egg.start(wait=True) self.status.Update(qId, 'egg laid') #update the hosts file hosts = "127.0.0.1 localhost\n" for server in self.config['DEFAULT']['SERVERS'].split(','): lup = socket.gethostbyname(server) hosts = hosts + f"{lup} {server}\n" egg.files.put("/etc/hosts", hosts) #setup the firewall ret = egg.execute(["/root/start.sh"]) print(ret.exit_code) self.status.Update(qId, 'egg initialized') return egg