def render_task(dburl, docpath, slug): """Render a document.""" oldcwd = os.getcwd() try: os.chdir(os.path.join(docpath, slug)) except: db = StrictRedis.from_url(dburl) job = get_current_job(db) job.meta.update({'out': 'Document not found.', 'return': 127, 'status': False}) return 127 db = StrictRedis.from_url(dburl) job = get_current_job(db) job.meta.update({'out': '', 'milestone': 0, 'total': 1, 'return': None, 'status': None}) job.save() p = subprocess.Popen(('lualatex', '--halt-on-error', slug + '.tex'), stdout=subprocess.PIPE) out = [] while p.poll() is None: nl = p.stdout.readline() out.append(nl) job.meta.update({'out': ''.join(out), 'return': None, 'status': None}) job.save() out = ''.join(out) job.meta.update({'out': ''.join(out), 'return': p.returncode, 'status': p.returncode == 0}) job.save() os.chdir(oldcwd) return p.returncode
def setup_container(d_os): cur_job = get_current_job() cur_job.meta['ownername'] = str(d_os['username']) cur_job.meta['request_status'] = "Performing status check" cur_job.save() cur_job.refresh() if d_os['ct_type'] == "openvz": cmd="vzctl set "+d_os['cid']+" --ipadd "+d_os['ipadd']+" --hostname "+d_os['hname']+" --nameserver "+d_os['nserver']+" --userpasswd "+d_os['usr']+":"+d_os['pwd']+" --save" print cmd out = check_output(shlex.split(cmd)) elif d_os['ct_type'] == "aws_vm": #create file under /home/laks/tmp/tutorials with ipaddr. fpathname = "/home/laks/tmp/tutorials/" + str(d_os['instance'].ip_address) + ".json" fd = open(fpathname,"w") fd.write(d_os['tutorial']) fd.close() spoty.install_sw(d_os['instance'],d_os['repo']) findreplace = [ ("SUBDOMAIN",d_os['username']), ("IPADDRESS",d_os['instance'].ip_address) ] creat_nginx_tmpl(findreplace,d_os) reload_nginx() else: print "setting up subdomain for user" + str(d_os['username']) cmd = "docker inspect --format '{{ .NetworkSettings.IPAddress }}' " +str(d_os['imgid']) proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE,stderr=subprocess.PIPE) ipaddr, err = proc.communicate() print ipaddr if d_os['code'] == 1: findreplace = [ ("SUBDOMAIN",d_os['username']+str(d_os['code'])), ("IPADDRESS",ipaddr) ] else: findreplace = [ ("SUBDOMAIN",d_os['username']), ("IPADDRESS",ipaddr) ] creat_nginx_tmpl(findreplace,d_os) reload_nginx() time.sleep(2) setup_docker_ct_helper(d_os) cur_job = get_current_job() cur_job.meta['ownername'] = str(d_os['username']) cur_job.meta['request_status'] = "Running, please login" cur_job.save() cur_job.refresh() #next queue if d_os['proceed_nextq'] : with Connection(Redis()): q=Queue('startq', default_timeout=15000) job = q.enqueue_call(func=start_container,args=(d_os,),result_ttl=600)
def __init__(self): self.job = get_current_job() self.timeout = 60 * 10 self.start_date = time.time() while 'mongo_id' not in self.job.meta: self.job = get_current_job() self.mongo_id = ObjectId(self.job.meta['mongo_id']) file_ = inspect.getfile(self.__class__) self.name = os.path.basename(file_).split('.')[0] self.result = "Ok" self._do_run = True self.run()
def add_job(username, domain, project): _job = get_current_job() payload = {'project': project, 'username': username, 'spider': domain, 'jobid': _job.id} req = urllib2.urlopen(scrapyd_uri, data=urllib.urlencode(payload)) if req.getcode() != 200: raise Exception while True: job = get_current_job() print 'job waiting. jobid: %s, meta: %s' % (job.id, job.meta) if 'status' in job.meta: return time.sleep(5)
def _poolJobs(self, db_name, check=False): """Check if we are a worker process. """ if get_current_connection() and get_current_job(): pass else: super(IrCron, self)._poolJobs(db_name, check)
def build_pkg_handler(): """ :return: """ status.idle = False packages = status.queue if len(packages) > 0: pack = status.queue.lpop() if pack and pack is not None and pack != '': pkgobj = package.get_pkg_object(name=pack) else: return False rqjob = get_current_job(db) rqjob.meta['package'] = pkgobj.name rqjob.save() status.now_building = pkgobj.name if pkgobj.is_iso is True or pkgobj.is_iso == 'True': status.iso_building = True build_result = build_iso(pkgobj) else: build_result = build_pkgs(pkgobj) # TODO: Move this into its own method if build_result is not None: completed = status.completed failed = status.failed blds = pkgobj.builds total = len(blds) if total > 0: success = len([x for x in blds if x in completed]) failure = len([x for x in blds if x in failed]) if success > 0: success = 100 * success / total else: success = 0 if failure > 0: failure = 100 * failure / total else: failure = 0 pkgobj.success_rate = success pkgobj.failure_rate = failure if build_result is True: run_docker_clean(pkgobj.pkgname) if not status.queue and not status.hook_queue: remove('/opt/antergos-packages') status.idle = True status.building = 'Idle' status.now_building = 'Idle' status.container = '' status.building_num = '' status.building_start = '' status.iso_building = False logger.info('All builds completed.')
def stop(path, machineName, host, environment): new_env = resetEnv(host, environment) logger.debug('Bring down {}'.format(path)) old_path = os.getcwd() jobid = get_current_job().id try: os.chdir(path) _open_console(jobid) if machineName != '': _l = lambda line: _log_console(jobid, str(line)) sh.vagrant('halt', machineName, _ok_code=[0, 1, 2], _out=_l, _err=_l, _env=new_env).wait() else: _l = lambda line: _log_console(jobid, str(line)) sh.vagrant('halt', _ok_code=[0, 1, 2], _out=_l, _err=_l, _env=new_env).wait() except: logger.error('Failed to shut down machine {}'.format(path), exc_info=True) _close_console(jobid) os.chdir(old_path) # logger.debug('Done bring down {}'.format(path)) return json.dumps(_get_status(path, host, environment))
def provision(path, environment, machineName, host): new_env = resetEnv(host, environment) # logger.debug('Running provision on {} with env {}' # .format(path, environment)) old_path = os.getcwd() jobid = get_current_job().id try: os.chdir(path) _open_console(jobid) if machineName != '': _l = lambda line: _log_console(jobid, str(line)) sh.vagrant('provision', machineName, _ok_code=[0, 1, 2], _out=_l, _err=_l, _env=new_env).wait() else: _l = lambda line: _log_console(jobid, str(line)) sh.vagrant('provision', _ok_code=[0, 1, 2], _out=_l, _err=_l, _env=new_env).wait() except: logger.error('Failed to provision machine at {}'.format(path), exc_info=True) _close_console(jobid) os.chdir(old_path) return json.dumps(_get_status(path, host, environment))
def run_tests(payload): #payload = get_payload(payload_id) job = get_current_job() # work out the repo_url repo_name = payload['repository']['name'] owner = payload['repository']['owner']['name'] repo_url = "[email protected]:%s/%s.git" % (owner, repo_name) update_progress(job, 'repo url: %s' % repo_url) logger.info("repo: %s" % repo_url) vpath = tempfile.mkdtemp(suffix="ridonkulous") logger.info("cloning repo %s to: %s" % (repo_url, vpath)) update_progress(job, "cloning repo %s to: %s" % (repo_url, vpath)) create_environment(vpath, site_packages=False) os.chdir(vpath) git.Git().clone(repo_url) os.chdir(os.path.join(vpath, repo_name)) pip = "%s/bin/pip" % vpath #python = "%s/bin/python" nose = "%s/bin/nosetests" % vpath ret = subprocess.call(r'%s install -r requirements.txt --use-mirrors' % pip, shell=True) logger.info("running nose") ret = subprocess.call(r'%s' % nose, shell=True) logger.info(ret) update_progress(job, 'done') return 'ok'
def archive(files): """ argument expected: [ {"path":"/path/to/file", "name","file_name_in_zip"}, { ... }, ... ] returns a s3 url """ job = rq.get_current_job(get_redis()) handle, tempname = tempfile.mkstemp() os.close(handle) with ZipFile(tempname, mode='w', compression=ZIP_DEFLATED) as zipfile: for file_ in files: zipfile.write(file_['path'], file_['name']) job.meta['size'] = si_unit(os.path.getsize(tempname)) job.save() objname = str(uuid.uuid4()) + ".zip" s3 = boto3.resource('s3') s3.Bucket(bucket).upload_file(tempname, objname, ExtraArgs={'ContentType':'application/zip'}) os.remove(tempname) url = "https://%s.s3.amazonaws.com/%s" % (bucket, objname) return url
def rsync(path, host, environment, machineName=None): new_env = resetEnv(host, environment) old_path = os.getcwd() os.chdir(path) try: jobid = get_current_job().id _open_console(jobid) _log_console( jobid, 'Running rsync on machine {}.\n'.format(machineName) ) _l = lambda line: _log_console(jobid, str(line)) if machineName is not None: sh.vagrant('rsync', machineName, _out=_l, _err=_l, _ok_code=[0, 1, 2], _env=new_env).wait() else: sh.vagrant('rsync', _out=_l, _err=_l, _ok_code=[0, 1, 2], _env=new_env).wait() _log_console( jobid, 'rsync is done running on machine {}.\n'.format(machineName)) _close_console(jobid) except: return json.dumps({'msg': 'error trying to run vagrant rsync'}) os.chdir(old_path) return json.dumps({'msg': 'rsync done'})
def _employees(self, company_name="", keyword=""): ''' Linkedin Scrape ''' # TODO - add linkedin directory search ''' Linkedin Scrape''' args = '-inurl:"/dir/" -inurl:"/find/" -inurl:"/updates"' args = args+' -inurl:"job" -inurl:"jobs2" -inurl:"company"' qry = '"at {0}" {1} {2} site:linkedin.com' qry = qry.format(company_name, args, keyword) results = Google().search(qry, 10) results = results.dropna() results = Google()._google_df_to_linkedin_df(results) _name = '(?i){0}'.format(company_name) if " " in company_name: results['company_score'] = [fuzz.partial_ratio(_name, company) for company in results.company] else: results['company_score'] = [fuzz.ratio(_name, company) for company in results.company] if keyword != "": results['score'] = [fuzz.ratio(keyword, title) for title in results.title] results = results[results.score > 75] results = results[results.company_score > 64] results = results.drop_duplicates() data = {'data': results.to_dict('r'), 'company_name':company_name} CompanyExtraInfoCrawl()._persist(data, "employees", "") job = rq.get_current_job() print job.meta.keys() if "queue_name" in job.meta.keys(): if RQueue()._has_completed(job.meta["queue_name"]): q.enqueue(Jigsaw()._upload_csv, job.meta["company_name"]) return results
def update_repo(self): with Connection(db): current_job = get_current_job() if 'update_repo' != current_job.origin: logger.error('Only the repo worker can update repos!') return trans_running = status.transactions_running or status.transaction_queue building_saved = False excluded = [ 'Updating antergos repo database.', 'Updating antergos-staging repo database.', 'Processing developer review result.', 'Checking remote package sources for changes.', ] if not status.idle and trans_running and status.current_status not in excluded: building_saved = status.current_status elif status.idle: status.idle = False msg = excluded[0] if 'antergos' == self.name else excluded[1] status.current_status = msg self._update_repo() trans_running = status.transactions_running or status.transaction_queue if building_saved and not status.idle and status.current_status == msg: status.current_status = building_saved elif status.idle or not trans_running: status.idle = True status.current_status = 'Idle.'
def get_csv_rows(self, queryset, type, model): data = [self.get_csv_header()] total = queryset.count() processed = 0 job = get_current_job() for asset in queryset: row = ['part', ] if asset.part_info else ['device', ] for item in self.columns: field = item.field if field: nested_field_name = item.foreign_field_name if nested_field_name == type: cell = self.get_cell( getattr(asset, type), field, model ) elif nested_field_name == 'part_info': cell = self.get_cell(asset.part_info, field, PartInfo) elif nested_field_name == 'venture': cell = self.get_cell(asset.venture, field, Venture) else: cell = self.get_cell(asset, field, Asset) row.append(unicode(cell)) data.append(row) processed += 1 if job: job.meta['progress'] = processed / total if not job.meta['start_progress']: job.meta['start_progress'] = datetime.datetime.now() job.save() if job: job.meta['progress'] = 1 job.save() return data
def save_assembly_job(assembly, fasta_path, calculate_fourmers, search_genes, email=None, coverage_filename=None, bulk_size=5000): job = get_current_job() # Find essential genes essential_genes = None if search_genes: job.meta['status'] = 'Searching for essential genes per contig' job.save() essential_genes = find_essential_genes_per_contig(fasta_path) # Save contigs to database job.meta['status'] = 'Saving contigs' job.save() args = [assembly, fasta_path, calculate_fourmers, essential_genes, bulk_size] if coverage_filename is not None: samples, coverages = read_coverages(coverage_filename) args.append(coverages) assembly.samples = ','.join(samples) notfound = save_contigs(*args) job.meta['notfound'].extend(notfound) job.save() assembly.busy = False db.session.add(assembly) db.session.commit() if email: utils.send_completion_email(email, assembly.name) return {'assembly': assembly.id}
def process_document(path, options, meta): current_task = get_current_job() with Office(app.config["LIBREOFFICE_PATH"]) as office: # acquire libreoffice lock with office.documentLoad(path) as original_document: # open original document with TemporaryDirectory() as tmp_dir: # create temp dir where output'll be stored for fmt in options["formats"]: # iterate over requested formats current_format = app.config["SUPPORTED_FORMATS"][fmt] output_path = os.path.join(tmp_dir, current_format["path"]) original_document.saveAs(output_path, fmt=current_format["fmt"]) if options.get("thumbnails", None): is_created = False if meta["mimetype"] == "application/pdf": pdf_path = path elif "pdf" in options["formats"]: pdf_path = os.path.join(tmp_dir, "pdf") else: pdf_tmp_file = NamedTemporaryFile() pdf_path = pdf_tmp_file.name original_document.saveAs(pdf_tmp_file.name, fmt="pdf") is_created = True image = Image(filename=pdf_path, resolution=app.config["THUMBNAILS_DPI"]) if is_created: pdf_tmp_file.close() thumbnails = make_thumbnails(image, tmp_dir, options["thumbnails"]["size"]) result_path, result_url = make_zip_archive(current_task.id, tmp_dir) remove_file.schedule( datetime.timedelta(seconds=app.config["RESULT_FILE_TTL"]), result_path ) return result_url
def get_csv_rows(self, queryset, type, model): data = [self.get_csv_header()] total = queryset.count() processed = 0 job = get_current_job() for asset in queryset: row = ['part'] if asset.part_info else ['device'] for item in self.columns: field = item.field if field: nested_field_name = item.foreign_field_name if nested_field_name == type: cell = self.get_cell( getattr(asset, type), field, model ) elif nested_field_name == 'part_info': cell = self.get_cell(asset.part_info, field, PartInfo) elif nested_field_name == 'venture': cell = self.get_cell(asset.venture, field, Venture) elif nested_field_name == 'is_discovered': cell = unicode(asset.is_discovered) else: cell = self.get_cell(asset, field, Asset) row.append(unicode(cell)) data.append(row) processed += 1 set_progress(job, processed / total) set_progress(job, 1) return data
def walk(client, metadata, bytes_read, total_bytes): job = get_current_job() dir_path = os.path.basename(metadata['path']) bytes = metadata['bytes'] bytes_read += int(bytes) update_progress(job, float(bytes_read) / total_bytes, dir_path) result = {'name':os.path.basename(dir_path), 'children':[], 'value':bytes} if 'contents' in metadata: for dir_entry in metadata['contents']: path = dir_entry['path'] # Skip hidden files, shit gets too rowdy if os.path.basename(path)[0] == '.': continue dir_entry_bytes = dir_entry['bytes'] bytes_read += int(dir_entry_bytes) update_progress(job, float(bytes_read) / total_bytes, path) if dir_entry_bytes is 0: child, bytes_read = walk(client, get_metadata(client, path), bytes_read, total_bytes) else: child = {'name':os.path.basename(path), 'value':dir_entry_bytes} result['children'].append(child) #empty directories? do we care? if len(result['children']) is 0: _ = result.pop('children', None) return result, bytes_read
def scan_address_job( ip_address=None, plugins=None, results=None, automerge=AUTOMERGE_MODE, called_from_ui=False, **kwargs ): """The function that is actually running on the worker.""" job = rq.get_current_job() available_plugins = getattr(settings, 'SCAN_PLUGINS', {}).keys() if not plugins: plugins = available_plugins run_postprocessing = not (set(available_plugins) - set(plugins)) if ip_address and plugins: if not kwargs: ip, created = IPAddress.concurrent_get_or_create( address=ip_address, ) if not (ip.snmp_name and ip.snmp_community): message = "SNMP name/community is missing. Forcing autoscan." job.meta['messages'] = [ (ip_address, 'ralph.scan', 'info', message) ] job.save() autoscan_address(ip_address) kwargs = { 'snmp_community': ip.snmp_community, 'snmp_version': ip.snmp_version, 'http_family': ip.http_family, 'snmp_name': ip.snmp_name, } results = _run_plugins(ip_address, plugins, job, **kwargs) if run_postprocessing: _scan_postprocessing(results, job, ip_address) if automerge and job.meta.get('changed', True): # Run only when automerge mode is enabled and some change was # detected. When `change` state is not available just run it... save_job_results(job.id) elif not called_from_ui and job.args and job.meta.get('changed', True): # Run only when some change was detected. When `change` state is # not available just run it... try: ip_obj = IPAddress.objects.select_related().get( address=job.args[0] # job.args[0] == ip_address ) except IPAddress.DoesNotExist: pass else: for plugin_name in getattr( settings, 'SCAN_POSTPROCESS_ENABLED_JOBS', [] ): try: module = import_module(plugin_name) except ImportError as e: logger.error(unicode(e)) else: module.run_job(ip_obj) return results
def _scan_address(address, plugins, **kwargs): """The function that is actually running on the worker.""" job = rq.get_current_job() results = {} job.meta['messages'] = [] job.meta['finished'] = [] job.meta['status'] = {} for plugin_name in plugins: message = "Running plugin %s." % plugin_name job.meta['messages'].append((address, plugin_name, 'info', message)) job.save() try: module = import_module(plugin_name) except ImportError as e: message = 'Failed to import: %s.' % e job.meta['messages'].append((address, plugin_name, 'error', message)) job.meta['status'][plugin_name] = 'error' else: result = module.scan_address(address, **kwargs) results[plugin_name] = result for message in result.get('messages', []): job.meta['messages'].append((address, plugin_name, 'warning', message)) job.meta['status'][plugin_name] = result.get('status', 'success') job.meta['finished'].append(plugin_name) job.save() return results
def process_and_save_build_metadata(self, version_str=None): """ Initializes the build metadata. Args: pkg_obj (Package): Package object for the package being built. Returns: Build: A build object. """ self.start_str = self.datetime_to_string(datetime.now()) if version_str: self.version_str = version_str else: self.version_str = self._pkg_obj.version_str pkg_link = '<a href="/package/{0}">{0}</a>'.format(self._pkg_obj.pkgname) tpl = 'Build <a href="/build/{0}">{0}</a> for {1} <strong>{2}</strong> started.' tlmsg = tpl.format(self.bnum, pkg_link, self.version_str) get_timeline_object(msg=tlmsg, tl_type=3, ret=False) self._pkg_obj.builds.append(self.bnum) status.now_building.append(self.bnum) with Connection(self.db): current_job = get_current_job() current_job.meta['building_num'] = self.bnum current_job.save()
def scan_address_job( ip_address=None, plugins=None, results=None, automerge=AUTOMERGE_MODE, **kwargs ): """ The function that is actually running on the worker. """ job = rq.get_current_job() available_plugins = getattr(settings, 'SCAN_PLUGINS', {}).keys() if not plugins: plugins = available_plugins run_postprocessing = not (set(available_plugins) - set(plugins)) if ip_address and plugins: if not kwargs: ip, created = IPAddress.concurrent_get_or_create( address=ip_address, ) kwargs = { 'snmp_community': ip.snmp_community, 'snmp_version': ip.snmp_version, 'http_family': ip.http_family, 'snmp_name': ip.snmp_name, } results = _run_plugins(ip_address, plugins, job, **kwargs) if run_postprocessing: _scan_postprocessing(results, job, ip_address) # Run only when automerge mode is enabled and some change was detected. # When `change` state is not available just run it... if automerge and job.meta.get('changed', True): save_job_results(job.id) return results
def test(self, company_name): job = rq.get_current_job() print job.meta.keys() if "queue_name" in job.meta.keys(): print RQueue()._has_completed(job.meta["queue_name"]) print RQueue()._has_completed("queue_name") if RQueue()._has_completed(job.meta["queue_name"]): q.enqueue(Jigsaw()._upload_csv, job.meta["company_name"])
def nhmmer_search(sequence, description): """ RQ worker function. """ job = get_current_job() save_query(sequence, job.id, description) filename = NhmmerSearch(sequence=sequence, job_id=job.id)() save_results(filename, job.id)
def create_container(d_os): num = d_os['num_instance'] if d_os['ct_type'] == "docker" and already_running(d_os['username'],d_os['code']): print "Ignore ct_create request" return print " -->>Running for user " + str(d_os['username']) + "with ct_type" + str(d_os['ct_type']) + "uptime is:" + str(d_os['container_uptime']) cur_job = get_current_job() cur_job.meta['ownername'] = str(d_os['username']) cur_job.save() cur_job.refresh() while num > 0: if d_os['ct_type'] == "openvz": cmd="vzctl create "+d_os['cid']+" --ostemplate "+ d_os['ostemplate'] elif d_os['ct_type'] == "aws_vm": ec2_conn=spoty.ec2_connect() config_entry = spoty.read_conf_file(d_os['repo']) #Read distro specific config file. cur_job.meta['request_status'] = "Reading config files" cur_job.save() cur_job.refresh() spot,bdm = spoty.req_instance_and_tag(ec2_conn,config_entry) cur_job.meta['request_status'] = "Creating VM" cur_job.save() cur_job.refresh() instance=spoty.set_bdm(spot,bdm,ec2_conn,config_entry) cur_job.meta['request_status'] = "Booting VM" cur_job.save() cur_job.refresh() #push it into d_os d_os['instance'] = instance d_os['ec2_conn'] = ec2_conn cmd = "uname -a" else: d_os['repo_vers']='2' if d_os['code'] == 1: d_os['repo_vers']='3' d_os['container_uptime'] = 3600 cmd="docker run --user wmuser --name "+ d_os['username']+str(d_os['code']) + ' ' + d_os['options']+d_os['port'] + d_os['repo'] + d_os['repo_vers'] + d_os['ct_cmd'] print "Starting.." print cmd out = check_output(shlex.split(cmd)) print "Output is:" print out d_os['imgid'] = out.rstrip() num -= 1 if d_os['code'] == 1 : programmingsite.movedata_host2ct(d_os) if d_os['proceed_nextq'] : with Connection(Redis()): q=Queue('setupq', default_timeout=15000) job = q.enqueue_call(func=setup_container,args=(d_os,),result_ttl=600) cur_job.meta['request_status'] = "Install Software" cur_job.meta['setupq_jobid'] = job.id cur_job.save() cur_job.refresh() print cur_job.meta
def unregister_dirty(self, decrement=1): """Unregister current TreeItem as dirty (should be called from RQ job procedure after cache is updated) """ r_con = get_connection() job = get_current_job() logger.debug('UNREGISTER %s (-%s) where job_id=%s' % (self.get_cachekey(), decrement, job.id)) r_con.zincrby(POOTLE_DIRTY_TREEITEMS, self.get_cachekey(), 0 - decrement)
def send_message(**params): """ Tries to send the message with specified parameters & number of retries Args: to (list) - List of emails to send the message to from_email (str) - Email to send the message on behalf of subject (str) - Subject of the message text (str) - Main text that should go in the body of the message cc (list) - Optional; list of emails to send the message to, with the 'cc' header bcc (list) - Optional; list of emails to send the message to, with the 'bcc' header retries (int) - Optional; number of times each Mailer implementation should try to send the message All email fields are as specified in RFC-822 """ retries = params.get('retries', 1) #By default retry 1 time # TODO: Random shuffling is a crude load-balancing method. Ideally we may want to consider # the number of requests to send message made to each Mailer and route new requests accordingly. mailers = get_available_mailers() shuffle(mailers) #TODO: Check if rq has any inbuilt retry mechanism that can be leveraged while retries >= 0: for mailer in mailers: try: messages_info = mailer.send_message(**params) job = get_current_job() job.meta['handled_by'] = mailer.__class__.__name__ job.meta['messages_info'] = messages_info job.save() # TODO: Use a better way to store status info & metadata for it return except MailNotSentException as e: # TODO: Use logging here to log details of why this mail wasn't sent using # e.message & e.status_code. Also, add more details to MailNotSentException # if required pass except ConnectTimeout as e: # TODO: log pass # Catch other Exceptions that can be thrown here except Exception as e: # If the send_message method fails for any reason whatsoever, we want to use the # next Mailer. # TODO: Log. These logs will be very important as they'll let us know about failures # we're not anticipating pass retries = retries - 1
def _set_task_progress(task): """ This method will update the job progress using the task object :param task : Task :return: """ job = get_current_job() if job: job.meta['progress'] = task.export() job.save_meta()
def unregister_all_dirty(self, decrement=1): """Unregister current TreeItem and all parent paths as dirty (should be called from RQ job procedure after cache is updated) """ r_con = get_connection() job = get_current_job() for p in self.all_pootle_paths(): logger.debug('UNREGISTER %s (-%s) where job_id=%s' % (p, decrement, job.id)) r_con.zincrby(POOTLE_DIRTY_TREEITEMS, p, 0 - decrement)
def create_zim(settings, options): """Call the zim creator and the mailer when it is finished. """ job = get_current_job() log_dir = settings.get('zimit.logdir', '/tmp') log_file = os.path.join(log_dir, "%s.log" % job.id) zim_creator = load_from_settings(settings, log_file) zim_file = zim_creator.create_zim_from_website(options['url'], options) output_url = settings.get('zimit.output_url') zim_url = urlparse.urljoin(output_url, zim_file) send_zim_url(settings, options['email'], zim_url)
def analyze_user(user_id=None, user_tag=None, max_friends=20): try: userm = User.query.get(user_id) steps = 5 # GET MAIN USER current_step = 1 _set_task_progress(current_step, 'Obteniendo informacion de {}'.format(user_tag)) app.logger.info("Solicitando analisis de {}".format(user_tag)) user = get_user(user_name=user_tag) # GET FRIENDS IDS _set_task_progress(int(current_step * 100 / steps), 'Obteniendo lista de amigos') friends_ids = get_user_friends(user_name=user['screen_name']) current_step += 1 # pasos fijos + cantidad de amigos a recuperar + max_friends (si hay mas de 20) a analizar # por cada amigo hay que recuperar tweets, obtener personalidad y comparar con metrica con el usuario friends_n = len(friends_ids) if friends_n < 20: steps += friends_n + (friends_n * 3) else: steps += friends_n + (max_friends * 3) current_step += 1 # GET USER TWEETS LIST _set_task_progress(int(current_step * 100 / steps), 'Obteniendo tweets de {}'.format(user['user_name'])) user_tweets = get_user_tweets(user_name=user['screen_name']) current_step += 1 # GET USER PERSONALITIES _set_task_progress( int(current_step * 100 / steps), 'Obteniendo perfil de personalidad de {}'.format( user['user_name'])) user_personality = get_user_personality(user_tweets) current_step += 1 friends_list = list() for friend in friends_ids: # GET SPECIFIC FRIEND f = get_user(user_id=friend) # user dict _set_task_progress( int(current_step * 100 / steps), 'Obteniendo informacion de {}'.format(f['user_name'])) friends_list.append(f) current_step += 1 # SORT LIST OF FRIENDS BY POST COUNT friends_list = sorted(friends_list, key=lambda k: k['user_tweets'], reverse=True) # ONLY MOST RELEVANT MAX_FRIENDS friends_list = friends_list[:max_friends] # GET FRIENDS INFO friends_data = list() for friend in friends_list: # GET FRIEND TWEETS _set_task_progress( int(current_step * 100 / steps), 'Obteniendo tweets de {}'.format(friend['user_name'])) friend_tweets = get_user_tweets(user_id=friend['user_id']) current_step += 1 # GET FRIEND PERSONALITY _set_task_progress( int(current_step * 100 / steps), 'Obteniendo perfil de personalidad de {}'.format( friend['user_name'])) friend_personality = get_user_personality(friend_tweets) friend_s = { 'name': friend['screen_name'], 'personality': friend_personality, 'tweets_count': friend['user_tweets'], 'avatar': friend['avatar'] } friends_data.append(friend_s) current_step += 1 # OBTAIN DISTANCE METRICS for friend in friends_data: _set_task_progress( int(current_step * 100 / steps), 'Comparando {} con {}'.format(user['user_name'], friend['name'])) distance = manhattan_distance(user_personality, friend['personality']) friend['distance'] = distance current_step += 1 # SORT FRIENDS LIST ACCORDING TO DISTANCE friends_data = sorted(friends_data, key=lambda k: k['distance']) _set_task_progress(100, 'Analisis completo') user_analysis = { 'user_name': user['user_name'], 'user_screen_name': user['screen_name'], 'user_personality': user_personality, 'user_tweets_count': user['user_tweets'], 'user_friends_count': user['user_friends'], 'user_avatar': user['avatar'], 'user': 1, 'friends': friends_data } job = get_current_job() result = Result(id=job.get_id(), result=json.dumps(user_analysis)) db.session.add(result) db.session.commit() # SEND RESULTS EMAIL send_email('[ourPersonalities] Análisis completado', sender=app.config['LYRADMIN'][0], recipients=[userm.email], text_body=render_template('email/task_completed.txt', user=userm, task=job.get_id()), html_body=render_template('email/task_completed.html', user=userm, task=job.get_id())) # Sleeping 'til api request cools down time.sleep(60 * 15) except: _set_task_progress(500, 'Analisis fallido') app.logger.error('Unhandled exception', exc_info=sys.exc_info()) print("error")
def convert(instance, input_file, output_folder, priority, encoding_profile): errors = [] profiles = { "240p": { "width": 426, "vb": 300, "mb": 300, "bs": 600 }, "360p": { "width": 640, "vb": 350, "mb": 350, "bs": 700 }, "480p": { "width": 854, "vb": 500, "mb": 500, "bs": 1000 }, "720p": { "width": 1280, "vb": 1000, "mb": 1000, "bs": 2000 }, "1080p": { "width": 1920, "vb": 2000, "mb": 2000, "bs": 4000 } } job = get_current_job() print("Current job: {}".format(job.id)) # Points to 'EdulearnNetUpload' folder vidcon_root = app.config['VIDCON_ROOT'] # Check if directory is mounted properly if os.path.exists(vidcon_root): if len(os.listdir(vidcon_root)) == 0: #TODO mount sequence for OSX subprocess.call(["sudo", "mount", "/media/edulearnupload/"]) # E.g /Volumes/EdulearnNETUpload/asknlearn/vidcon/input/small_Sample.mp4 input_file_absolute_path = os.path.join(vidcon_root + instance + '/' + input_file) # E.g /Volumes/EdulearnNETUpload/asknlearn/vidcon/output/ output_folder_absolute_path = os.path.join(vidcon_root + instance + '/' + output_folder) # E.g small_Sample.mp4, derived from the given input filename output_filename = os.path.split(input_file)[1] # Split a filename into its name and extension: # E.g output_file = small_Sample, output_file_extension = .mp4 output_file, output_file_extension = os.path.splitext(output_filename) # Force extension to be ".mp4" output_file_extension = ".mp4" # Rename the file to include the profile that it was encoded under # E.g small_Sample_240p.mp4 new_output_filename = output_file + "_" + encoding_profile + output_file_extension new_output_file = os.path.join(output_folder_absolute_path + "/" + new_output_filename) profile = profiles[encoding_profile] ffmpeg_cmd = """ ffmpeg -i '{0}' -codec:v libx264 -profile:v high -preset slow -b:v {1}k -maxrate {2}k -bufsize {3}k -vf scale={4}:trunc(ow/a/2)*2 -threads 0 -codec:a mp3 -b:a 64k -y '{5}'""".format( input_file_absolute_path, str(profile['vb']), str(profile['mb']), str(profile['bs']), str(profile['width']), new_output_file) std_err = "" std_in = "" output = "" try: output = subprocess.check_output(shlex.split(ffmpeg_cmd)) print("Success: {}", output) except subprocess.CalledProcessError as ex: p = subprocess.Popen(shlex.split(ffmpeg_cmd), bufsize=2048, stdout=subprocess.PIPE, stderr=subprocess.PIPE) std_in, std_err = map( lambda b: b.decode('utf-8').replace(os.linesep, '\n'), p.communicate((os.linesep).encode('utf-8'))) print("std_in: {}".format(std_in)) print("std_err: {}".format(std_err)) raise GazzaThinksItFailedError(""" \n\nreturn_code: \n{}\n\nffmpeg_cmd: \n{}\n\noutput: \n{}\n\n """.format(ex.returncode, ffmpeg_cmd.strip(), std_err.strip())) finally: print("{}".format(input_file_absolute_path)) print("{}".format(new_output_file))
def job_message(message): job = get_current_job(connection=django_rq.get_connection()) if not job.meta.get('messages'): job.meta['messages'] = deque() job.meta['messages'].append(message) job.save()
def update_background(course_id, extension_dict): """ Update time on selected students' quizzes to a specified percentage. :param course_id: The Canvas ID of the Course to update in :type course_id: int :param extension_dict: A dictionary that includes the percent of time and a list of canvas user ids. Example: { 'percent': '300', 'user_ids': [ '0123456', '1234567', '9867543', '5555555' ] } :type extension_dict: dict """ job = get_current_job() update_job(job, 0, "Starting...", "started") with app.app_context(): if not extension_dict: update_job(job, 0, "Invalid Request", "failed", error=True) logger.warning("Invalid Request: {}".format(extension_dict)) return job.meta try: course_json = get_course(course_id) except requests.exceptions.HTTPError: update_job(job, 0, "Course not found.", "failed", error=True) logger.exception("Unable to find course #{}".format(course_id)) return job.meta course_name = course_json.get("name", "<UNNAMED COURSE>") user_ids = extension_dict.get("user_ids", []) percent = extension_dict.get("percent", None) if not percent: update_job(job, 0, "`percent` field required.", "failed", error=True) logger.warning("Percent field not provided. Request: {}".format( extension_dict)) return job.meta course, created = get_or_create(db.session, Course, canvas_id=course_id) course.course_name = course_name db.session.commit() for user_id in user_ids: try: canvas_user = get_user(course_id, user_id) sortable_name = canvas_user.get("sortable_name", "<MISSING NAME>") sis_id = canvas_user.get("sis_user_id") except requests.exceptions.HTTPError: # Unable to find user. Log and skip them. logger.warning("Unable to find user #{} in course #{}".format( user_id, course_id)) continue user, created = get_or_create(db.session, User, canvas_id=user_id) user.sortable_name = sortable_name user.sis_id = sis_id db.session.commit() # create/update extension extension, created = get_or_create(db.session, Extension, course_id=course.id, user_id=user.id) extension.percent = percent db.session.commit() quizzes = get_quizzes(course_id) num_quizzes = len(quizzes) quiz_time_list = [] unchanged_quiz_time_list = [] if num_quizzes < 1: update_job( job, 0, "Sorry, there are no quizzes for this course.", "failed", error=True, ) logger.warning( "No quizzes found for course {}. Unable to update.".format( course_id)) return job.meta for index, quiz in enumerate(quizzes): quiz_id = quiz.get("id", None) quiz_title = quiz.get("title", "[UNTITLED QUIZ]") comp_perc = int(((float(index)) / float(num_quizzes)) * 100) updating_str = "Updating quiz #{} - {} [{} of {}]" update_job( job, comp_perc, updating_str.format(quiz_id, quiz_title, index + 1, num_quizzes), "processing", error=False, ) extension_response = extend_quiz(course_id, quiz, percent, user_ids) if extension_response.get("success", False) is True: # add/update quiz quiz_obj, created = get_or_create(db.session, Quiz, canvas_id=quiz_id, course_id=course.id) quiz_obj.title = quiz_title quiz_obj.time_limit = quiz.get("time_limit") db.session.commit() added_time = extension_response.get("added_time", None) if added_time is not None: quiz_time_list.append({ "title": quiz_title, "added_time": added_time }) else: unchanged_quiz_time_list.append({"title": quiz_title}) else: update_job( job, comp_perc, extension_response.get("message", "An unknown error occured."), "failed", error=True, ) logger.error("Extension failed: {}".format(extension_response)) return job.meta msg_str = ( "Success! {} {} been updated for {} student(s) to have {}% time. " "{} {} no time limit and were left unchanged.") message = msg_str.format( len(quiz_time_list), "quizzes have" if len(quiz_time_list) != 1 else "quiz has", len(user_ids), percent, len(unchanged_quiz_time_list), "quizzes have" if len(unchanged_quiz_time_list) != 1 else "quiz has", ) update_job(job, 100, message, "complete", error=False) job.meta["quiz_list"] = quiz_time_list job.meta["unchanged_list"] = unchanged_quiz_time_list job.save() return job.meta
def invoke_iap_analysis(analysis_id, timestamp_id, username, task_key, experiment_iap_id=None): """ This Methods represents an RQ Job workload. It should be enqueued into the RQ Analysis Queue and processed by an according worker Handles the invocation of data analysis in IAP on the IAP server and fetches the result information afterwards. The received information is then entered into the database accordingly The experiment_id has to be either passed directly or has to be stored in the result of a job that this one depends on. The key under which it must be stored from the previous job is 'response.experiment_iap_id' :param analysis_id: The ID of the :class:`~server.models.analysis_model.AnalysisModel` :param timestamp_id: The ID of the :class:`~server.models.timestamp_model.TimestampModel` instance which should be analyzed :param username: The username of the user invoking this job :param experiment_iap_id: The IAP ID of this experiment. If this is None the job will assume that the job it depended on has returned the experiment id in its response object with the key 'experiment_id' :return: A dict containing the 'result_id' from IAP, the used 'pipeline_id', 'started_at' and 'finished_at' timestamps. (All nested inside the 'response' key) """ print('EXECUTE ANALYSIS') job = get_current_job() log_store = get_log_store() task = AnalysisTask.from_key(get_redis_connection(), task_key) channel = get_grpc_channel() iap_stub = phenopipe_iap_pb2_grpc.PhenopipeIapStub(channel) pipe_stub = phenopipe_pb2_grpc.PhenopipeStub(channel) if experiment_iap_id is None: experiment_iap_id = job.dependency.result['response']['experiment_iap_id'] log_store.put(job.id, 'Started Analysis Job', 0) task.update_message('Started Analysis Job') session = get_session() # TODO Consider DB errors analysis = session.query(AnalysisModel).get(analysis_id) started_at = datetime.utcnow() analysis.started_at = started_at session.commit() try: response = iap_stub.AnalyzeExperiment( phenopipe_iap_pb2.AnalyzeRequest(experiment_id=experiment_iap_id, pipeline_id=analysis.pipeline_id) ) remote_job_id = response.job_id request = phenopipe_pb2.WatchJobRequest( job_id=remote_job_id ) status = pipe_stub.WatchJob(request) for msg in status: log_store.put(job.id, msg.message.decode('string-escape'), msg.progress) response = iap_stub.FetchAnalyzeResult( phenopipe_pb2.FetchJobResultRequest(job_id=remote_job_id) ) finished_at = datetime.utcnow() analysis.iap_id = response.result_id analysis.finished_at = finished_at session.commit() log_store.put(job.id, 'Finished Analysis Job', 100) task.update_message('Finished Analysis Job') return create_return_object(JobType.iap_analysis, timestamp_id, {'result_id': response.result_id, 'started_at': started_at, 'finished_at': finished_at, 'pipeline_id': analysis.pipeline_id}) except grpc.RpcError as e: session.delete(session.query(AnalysisModel).get(analysis.id)) session.commit() log_store.put(job.id, e.details(), 0) task.update_message('Analysis Job Failed') raise
def setup_template_task(template_id, name, user, password, cores, memory): with app.app_context(): job = get_current_job() proxmox = connect_proxmox() starrs = connect_starrs() db = connect_db() print("[{}] Retrieving template info for template {}.".format( name, template_id)) template = get_template(db, template_id) print("[{}] Cloning template {}.".format(name, template_id)) job.meta['status'] = 'cloning template' job.save_meta() vmid, mac = clone_vm(proxmox, template_id, name, user) print("[{}] Registering in STARRS.".format(name)) job.meta['status'] = 'registering in STARRS' job.save_meta() ip = get_next_ip(starrs, app.config['STARRS_IP_RANGE']) register_starrs(starrs, name, app.config['STARRS_USER'], mac, ip) get_vm_expire(db, vmid, app.config['VM_EXPIRE_MONTHS']) print("[{}] Setting CPU and memory.".format(name)) job.meta['status'] = 'setting CPU and memory' job.save_meta() vm = VM(vmid) vm.set_cpu(cores) vm.set_mem(memory) print( "[{}] Waiting for STARRS to propogate before starting VM.".format( name)) job.meta['status'] = 'waiting for STARRS' job.save_meta() time.sleep(90) print("[{}] Starting VM.".format(name)) job.meta['status'] = 'starting VM' job.save_meta() vm.start() print("[{}] Waiting for VM to start before SSHing.".format(name)) job.meta['status'] = 'waiting for VM to start' job.save_meta() time.sleep(20) print("[{}] Creating SSH session.".format(name)) job.meta['status'] = 'creating SSH session' job.save_meta() client = paramiko.SSHClient() client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) retry = 0 while retry < 30: try: client.connect(ip, username=template['username'], password=template['password']) break except: retry += 1 time.sleep(3) print("[{}] Running user creation commands.".format(name)) job.meta['status'] = 'running user creation commands' job.save_meta() stdin, stdout, stderr = client.exec_command("useradd {}".format(user)) exit_status = stdout.channel.recv_exit_status() root_password = gen_password(32) stdin, stdout, stderr = client.exec_command( "echo '{}' | passwd root --stdin".format(root_password)) exit_status = stdout.channel.recv_exit_status() stdin, stdout, stderr = client.exec_command( "echo '{}' | passwd '{}' --stdin".format(password, user)) exit_status = stdout.channel.recv_exit_status() stdin, stdout, stderr = client.exec_command( "passwd -e '{}'".format(user)) exit_status = stdout.channel.recv_exit_status() stdin, stdout, stderr = client.exec_command( "echo '{} ALL=(ALL:ALL) ALL' | sudo EDITOR='tee -a' visudo".format( user)) exit_status = stdout.channel.recv_exit_status() client.close() print("[{}] Template successfully provisioned.".format(name)) job.meta['status'] = 'completed' job.save_meta()
def preprocess(job): jb = get_current_job() print('Current job: %s' % (jb.id, )) try: job.type = 'preprocess' job.init_storage() dt = datetime.now() jb.meta['job_exec_time'] = str(int(dt.timestamp() * 1000)) job.init_temp(jb.id) job.init_labels() job.jb = jb job.traincoco = { "info": { "description": "COCO 2017 Dataset", "url": "http://cocodataset.org", "version": "1.0", "year": 2018, "contributor": "COCO Consortium", "date_created": "2017/09/01" }, "licenses": [], "images": [], "categories": [], "annotations": [], } job.testcoco = { "info": { "description": "COCO 2017 Dataset", "url": "http://cocodataset.org", "version": "1.0", "year": 2018, "contributor": "COCO Consortium", "date_created": "2017/09/01" }, "licenses": [], "images": [], "categories": [], "annotations": [], } if hasattr(job, 'aug') and job.aug: jb.meta['steps'] = 6 else: jb.meta['steps'] = 5 jb.meta['current_step_processed'] = 0 jb.meta['current_step_name'] = 'prep_existing_images' jb.meta['current_step'] = 0 jb.save_meta() process_json(job) jb.meta['current_step_processed'] = 0 jb.meta['current_step_size'] = 1 jb.meta['current_step_name'] = 'create_tag_lables' jb.meta['current_step'] += 1 create_label_pbtxt(job) jb.meta['current_step_size'] = 0 jb.meta['current_step_name'] = 'create_training_corpus' jb.meta['current_step'] += 1 jb.save_meta() create_tf_example(job) jb.meta['current_step_size'] = 0 jb.meta['current_step_name'] = 'create_testing_corpus' jb.meta['current_step'] += 1 jb.save_meta() create_tf_example(job, False) jb.meta['current_step_size'] = 0 jb.meta['current_step_name'] = 'cleaning_up' jb.meta['current_step'] += 1 jb.save_meta() delete_staged(job) upload_metadata(job) jb.meta['current_step_size'] = 0 jb.meta['current_step_name'] = 'done' jb.meta['current_step'] += 1 dt = datetime.now() jb.meta['job_end_time'] = str(int(dt.timestamp() * 1000)) jb.save_meta() job.upload_data(job.to_json_string(), 'jobs/finished/{}_{}_preprocess_d_{}.json'.format( str(job.start_time), str(job.end_time), jb.id), contentType='application/json') return job except: var = traceback.format_exc() dt = datetime.now() job.end_time = int(dt.timestamp() * 1000) jb.meta['job_exception'] = var job.exception = var try: job.upload_data(job.to_json_string(), 'jobs/failed/{}_{}_preprocess_f_{}.json'.format( str(job.start_time), str(job.end_time), jb.id), contentType='application/json') except: pass jb.save_meta() raise finally: try: ct = 'd' if hasattr(job, 'exception'): ct = 'f' job.upload_data(job.to_json_string(), 'jobs/all/{}_{}_preprocess_{}_{}.json'.format( str(job.start_time), str(job.end_time), ct, jb.id), contentType='application/json') except: pass try: job.delete_cloud_file( 'jobs/running/{}_0_preprocess_r_{}.json'.format( str(job.start_time), jb.id)) except: pass try: job.delete_cloud_file('jobs/all/{}_0_preprocess_r_{}.json'.format( str(job.start_time), jb.id)) except: pass job.cleanup()
def access_self(): return get_current_job().id
def modify_self_and_error(meta): j = get_current_job() j.meta.update(meta) j.save() return 1 / 0
def export_clients(): global file_name try: job = get_current_job() clients = Client.query.all() total_clients = Client.query.count() # _set_task_progress(0) # i = 0 file_name = 'clients' + "-" + str(time.time()) + '.xlsx' workbook = xlsxwriter.Workbook(Config.EXCEL_FOLDER + file_name) # Add a bold format to use to highlight cells. bold = workbook.add_format({'bold': True}) worksheet = workbook.add_worksheet() date_format = workbook.add_format({'num_format': 'mmmm d yyyy'}) # Start from the first cell. Rows and columns are zero indexed. row = 1 col = 0 worksheet.write('A1', 'Name', bold) worksheet.write('B1', 'Email', bold) worksheet.write('C1', 'Contact', bold) worksheet.write('D1', 'Building', bold) worksheet.write('E1', 'House number', bold) worksheet.write('F1', 'Package', bold) worksheet.write('G1', 'Last payment date', bold) worksheet.write('H1', 'Due date', bold) worksheet.set_column('A:B', 30) worksheet.set_column('B:C', 30) worksheet.set_column('C:D', 30) worksheet.set_column('D:E', 30) worksheet.set_column('E:F', 30) worksheet.set_column('F:G', 20) worksheet.set_column('G:H', 30) worksheet.set_column('H:I', 30) for client in clients: service = Service.query.filter_by(client_id=client.id).first() payment = Payment.query.filter_by(client_id=client.id).last() name = client.client_user.fullname() email = client.client_user.email contact = client.client_user.phone building = client.building house = client.house if service: package = service.service_tariff.name else: package = None worksheet.write(row, col, name) worksheet.write(row, col + 1, email) worksheet.write(row, col + 2, contact) worksheet.write(row, col + 3, building) worksheet.write(row, col + 4, house) worksheet.write(row, col + 5, package) if payment: last_payment = payment.date due_date = last_payment + relativedelta(months=+1, days=-1) worksheet.write_datetime(row, col + 6, last_payment, date_format) worksheet.write_datetime(row, col + 7, due_date, date_format) else: last_payment = None due_date = None worksheet.write(row, col + 6, last_payment) worksheet.write(row, col + 7, due_date) row += 1 workbook.close() time.sleep(5) # i += 1 task = Task.query.get(job.get_id()) task.complete = True db.session.commit() download = Download.query.filter_by(task_id=job.get_id()).first() if download: d = datetime.now name = 'Data of ' + str(total_clients) + ' clients in XLS' download.name = name download.path = file_name download.generated = datetime.now() download.status = 1 db.session.commit() except: app.logger.error('Unhandled exception', exc_info=sys.exc_info())
def make_schedule( asset_id: int, start: datetime, end: datetime, belief_time: datetime, resolution: timedelta, soc_at_start: Optional[float] = None, soc_targets: Optional[pd.Series] = None, ) -> bool: """Preferably, a starting soc is given. Otherwise, we try to retrieve the current state of charge from the asset (if that is the valid one at the start). Otherwise, we set the starting soc to 0 (some assets don't use the concept of a state of charge, and without soc targets and limits the starting soc doesn't matter). """ # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork db.engine.dispose() rq_job = get_current_job() # find asset asset = Asset.query.filter_by(id=asset_id).one_or_none() click.echo( "Running Scheduling Job %s: %s, from %s to %s" % (rq_job.id, asset, start, end) ) if soc_at_start is None: if start == asset.soc_datetime and asset.soc_in_mwh is not None: soc_at_start = asset.soc_in_mwh else: soc_at_start = 0 if soc_targets is None: soc_targets = pd.Series( np.nan, index=pd.date_range(start, end, freq=resolution, closed="right") ) if asset.asset_type_name == "battery": consumption_schedule = schedule_battery( asset, asset.market, start, end, resolution, soc_at_start, soc_targets ) elif asset.asset_type_name in ( "one-way_evse", "two-way_evse", ): consumption_schedule = schedule_charging_station( asset, asset.market, start, end, resolution, soc_at_start, soc_targets ) else: raise ValueError( "Scheduling is not supported for asset type %s." % asset.asset_type ) data_source = get_data_source( data_source_name="Seita", data_source_type="scheduling script", ) click.echo("Job %s made schedule." % rq_job.id) ts_value_schedule = [ Power( datetime=dt, horizon=dt.astimezone(pytz.utc) - belief_time.astimezone(pytz.utc), value=-value, asset_id=asset_id, data_source_id=data_source.id, ) for dt, value in consumption_schedule.items() ] # For consumption schedules, positive values denote consumption. For the db, consumption is negative try: save_to_session(ts_value_schedule) except IntegrityError as e: current_app.logger.warning(e) click.echo("Rolling back due to IntegrityError") db.session.rollback() if current_app.config.get("FLEXMEASURES_MODE", "") == "play": click.echo("Saving again, with overwrite=True") save_to_session(ts_value_schedule, overwrite=True) db.session.commit() return True
def _set_task_results(md5): job = get_current_job() if job: task = Task.query.get(job.get_id()) task.md5 = md5 db.session.commit()
def run_task_predictions(ml_backend_id, batch_size=100): """ Run prediction and update db, stats counts and project prerequisites :param project_id: :param batch_size: :return: """ ml_backend = MLBackend.objects.get(id=ml_backend_id) response = ml_backend.setup() if response.is_error: raise ValueError(response.error_message) else: if response.response['model_version'] != ml_backend.model_version: ml_backend.model_version = response.response['model_version'] ml_backend.save() # collect tasks without predictions for current model version tasks_without_predictions = ml_backend.project.tasks.annotate( model_version=F('predictions__model_version'), num_predictions=Count('predictions')).filter( ~Q(model_version=ml_backend.model_version) | Q(num_predictions=0)) if not tasks_without_predictions.exists(): logger.info( f'Predictions for project {ml_backend.project} with version {ml_backend.model_version} already exist, ' f'update is not needed') return {'status': 'ok'} else: logger.info( f'Found {tasks_without_predictions.count()} tasks without predictions ' f'from model version {ml_backend.model_version} in project {ml_backend.project}' ) # TODO: randomize tasks selection so that taken tasks don't clash with each other with high probability tasks = TaskSerializer(tasks_without_predictions[:batch_size], many=True).data failed_tasks = [] for task in tasks: task_id = task['id'] ml_api_result = ml_backend.api.make_predictions( [task], ml_backend.model_version, ml_backend.project) if not _validate_ml_api_result(ml_api_result, [task], logger): logger.warning( f'Project {ml_backend.project}: task {task.id} failed') failed_tasks.append(task) continue prediction_result = ml_api_result.response['results'][0] with transaction.atomic(): Prediction.objects.filter( task_id=task_id, model_version=ml_backend.model_version).delete() Prediction.objects.create( task_id=task_id, model_version=ml_backend.model_version, result=prediction_result['result'], score=safe_float(prediction_result.get('score', 0)), cluster=prediction_result.get('cluster'), neighbors=prediction_result.get('neighbors'), mislabeling=safe_float(prediction_result.get('mislabeling', 0))) logger.info( f'Project {ml_backend.project}: task {task_id} processed with model version {ml_backend.model_version}' ) MLBackendPredictionJob.objects.filter(job_id=get_current_job().id).delete() logger.info( f'Total task processes: {len(tasks)}, failed: {len(failed_tasks)}') return { 'status': 'ok', 'processed_num': len(tasks), 'failed': failed_tasks }
def worker_function(event_type, assignment_id, participant_id, node_id=None, details=None): """Process the notification.""" _config() q = _get_queue() try: db.logger.debug("rq: worker_function working on job id: %s", get_current_job().id) db.logger.debug("rq: Received Queue Length: %d (%s)", len(q), ", ".join(q.job_ids)) except AttributeError: db.logger.debug("Debug worker_function called synchronously") exp = _loaded_experiment(db.session) key = "-----" exp.log( "Received an {} notification for assignment {}, participant {}".format( event_type, assignment_id, participant_id), key, ) if event_type == "TrackingEvent": node = None if node_id: node = models.Node.query.get(node_id) if not node: participant = None if participant_id: # Lookup assignment_id to create notifications participant = models.Participant.query.get(participant_id) elif assignment_id: participants = models.Participant.query.filter_by( assignment_id=assignment_id).all() # if there are one or more participants select the most recent if participants: participant = max(participants, key=attrgetter("creation_time")) participant_id = participant.id if not participant: exp.log( "Warning: No participant associated with this " "TrackingEvent notification.", key, ) return nodes = participant.nodes() if not nodes: exp.log( "Warning: No node associated with this " "TrackingEvent notification.", key, ) return node = max(nodes, key=attrgetter("creation_time")) if not details: details = {} info = information.TrackingEvent(origin=node, details=details) db.session.add(info) db.session.commit() return runner_cls = WorkerEvent.for_name(event_type) if not runner_cls: exp.log( "Event type {} is not supported... ignoring.".format(event_type)) return if assignment_id is not None: # save the notification to the notification table notif = models.Notification(assignment_id=assignment_id, event_type=event_type) db.session.add(notif) db.session.commit() # try to identify the participant participants = models.Participant.query.filter_by( assignment_id=assignment_id).all() # if there are one or more participants select the most recent if participants: participant = max(participants, key=attrgetter("creation_time")) # if there are none print an error else: exp.log( "Warning: No participants associated with this " "assignment_id. Notification will not be processed.", key, ) return None elif participant_id is not None: participant = models.Participant.query.filter_by( id=participant_id).all()[0] else: raise ValueError( "Error: worker_function needs either an assignment_id or a " "participant_id, they cannot both be None") participant_id = participant.id runner = runner_cls(participant, assignment_id, exp, db.session, _config(), datetime.now()) runner() db.session.commit()
def set_task_complete(): job = get_current_job() if job: task = Task.query.get(job.get_id()) task.complete = True db.session.commit()
def build(req: dict): """Build image request and setup ImageBuilders automatically The `request` dict contains properties of the requested image. Args: request (dict): Contains all properties of requested image """ if not req["store_path"].is_dir(): raise StorePathMissingError() job = get_current_job() log.debug(f"Building {req}") cache = (Path.cwd() / "cache" / req["version"] / req["target"]).parent target, subtarget = req["target"].split("/") sums_file = Path(cache / f"{subtarget}_sums") sig_file = Path(cache / f"{subtarget}_sums.sig") def setup_ib(): """Setup ImageBuilder based on `req` This function downloads and verifies the ImageBuilder archive. Existing setups are automatically updated if newer version are available upstream. """ log.debug("Setting up ImageBuilder") if (cache / subtarget).is_dir(): rmtree(cache / subtarget) download_file("sha256sums.sig", sig_file) download_file("sha256sums", sums_file) if not verify_usign(sig_file, sums_file, req["branch_data"]["pubkey"]): raise BadSignatureError() ib_search = re.search( r"^(.{64}) \*(openwrt-imagebuilder-.+?\.Linux-x86_64\.tar\.xz)$", sums_file.read_text(), re.MULTILINE, ) if not ib_search: raise ChecksumMissingError() ib_hash, ib_archive = ib_search.groups() download_file(ib_archive) if ib_hash != get_file_hash(cache / ib_archive): raise BadChecksumError() (cache / subtarget).mkdir(parents=True, exist_ok=True) extract_archive = subprocess.run( ["tar", "--strip-components=1", "-xf", ib_archive, "-C", subtarget], cwd=cache, ) if extract_archive.returncode: raise ExtractArchiveError() log.debug(f"Extracted TAR {ib_archive}") (cache / ib_archive).unlink() for key in req["branch_data"].get("extra_keys", []): fingerprint = fingerprint_pubkey_usign(key) (cache / subtarget / "keys" / fingerprint).write_text( f"untrusted comment: ASU extra key {fingerprint}\n{key}" ) repos_path = cache / subtarget / "repositories.conf" repos = repos_path.read_text() # speed up downloads with HTTP and CDN repos = repos.replace("https://downloads.openwrt.org", req["upstream_url"]) repos = repos.replace("http://downloads.openwrt.org", req["upstream_url"]) repos = repos.replace("https", "http") extra_repos = req["branch_data"].get("extra_repos") if extra_repos: log.debug("Found extra repos") for name, repo in extra_repos.items(): repos += f"\nsrc/gz {name} {repo}" repos_path.write_text(repos) log.debug(f"Repos:\n{repos}") if (Path.cwd() / "seckey").exists(): # link key-build to imagebuilder (cache / subtarget / "key-build").symlink_to(Path.cwd() / "seckey") if (Path.cwd() / "pubkey").exists(): # link key-build.pub to imagebuilder (cache / subtarget / "key-build.pub").symlink_to(Path.cwd() / "pubkey") if (Path.cwd() / "newcert").exists(): # link key-build.ucert to imagebuilder (cache / subtarget / "key-build.ucert").symlink_to(Path.cwd() / "newcert") def download_file(filename: str, dest: str = None): """Download file from upstream target path The URL points automatically to the targets folder upstream Args: filename (str): File in upstream target folder dest (str): Optional path to store the file, default to target cache folder """ log.debug(f"Downloading {filename}") urllib.request.urlretrieve( req["upstream_url"] + "/" + req["branch_data"]["path"].format(version=req["version"]) + "/targets/" + req["target"] + "/" + filename, dest or (cache / filename), ) cache.mkdir(parents=True, exist_ok=True) stamp_file = cache / f"{subtarget}_stamp" sig_file_headers = urllib.request.urlopen( req["upstream_url"] + "/" + req["branch_data"]["path"].format(version=req["version"]) + "/targets/" + req["target"] + "/sha256sums.sig" ).info() log.debug(f"sig_file_headers: \n{sig_file_headers}") origin_modified = sig_file_headers.get("Last-Modified") log.info("Origin %s", origin_modified) if stamp_file.is_file(): local_modified = stamp_file.read_text() log.info("Local %s", local_modified) else: local_modified = "" if origin_modified != local_modified: log.debug("New ImageBuilder upstream available") setup_ib() stamp_file.write_text(origin_modified) info_run = subprocess.run( ["make", "info"], text=True, capture_output=True, cwd=cache / subtarget ) version_code = re.search('Current Revision: "(r.+)"', info_run.stdout).group(1) if "version_code" in req: if version_code != req.get("version_code"): raise ImageBuilderVersionError( f"requested {req['version_code']} vs got {version_code}" ) if req.get("diff_packages", False): default_packages = set( re.search(r"Default Packages: (.*)\n", info_run.stdout).group(1).split() ) profile_packages = set( re.search( r"{}:\n .+\n Packages: (.*?)\n".format(req["profile"]), info_run.stdout, re.MULTILINE, ) .group(1) .split() ) remove_packages = (default_packages | profile_packages) - req["packages"] req["packages"] = req["packages"] | set(map(lambda p: f"-{p}", remove_packages)) manifest_run = subprocess.run( [ "make", "manifest", f"PROFILE={req['profile']}", f"PACKAGES={' '.join(req.get('packages', ''))}", "STRIP_ABI=1", ], text=True, cwd=cache / subtarget, capture_output=True, ) if manifest_run.returncode: if "Package size mismatch" in manifest_run.stderr: rmtree(cache / subtarget) return build(req) else: job.meta["stdout"] = manifest_run.stdout job.meta["stderr"] = manifest_run.stderr job.save_meta() raise PackageSelectionError() manifest = dict(map(lambda pv: pv.split(" - "), manifest_run.stdout.splitlines())) for package, version in req.get("packages_versions", {}).items(): if package not in manifest: raise PackageSelectionError(f"{package} not in manifest") if version != manifest[package]: raise PackageSelectionError( f"{package} version not as requested: {version} vs. {manifest[package]}" ) manifest_packages = manifest.keys() log.debug(f"Manifest Packages: {manifest_packages}") packages_hash = get_packages_hash(manifest_packages) log.debug(f"Packages Hash {packages_hash}") bin_dir = Path(req["version"]) / req["target"] / req["profile"] / packages_hash (req["store_path"] / bin_dir).mkdir(parents=True, exist_ok=True) image_build = subprocess.run( [ "make", "image", f"PROFILE={req['profile']}", f"PACKAGES={' '.join(req['packages'])}", f"EXTRA_IMAGE_NAME={packages_hash}", f"BIN_DIR={req['store_path'] / bin_dir}", ], text=True, cwd=cache / subtarget, capture_output=True, ) # check if running as job or within pytest if job: job.meta["stdout"] = image_build.stdout job.meta["stderr"] = image_build.stderr job.meta["bin_dir"] = str(bin_dir) job.save_meta() if image_build.returncode: raise ImageBuildError() json_file = Path(req["store_path"] / bin_dir / "profiles.json") if not json_file.is_file(): raise JSONMissingError() json_content = json.loads(json_file.read_text()) if req["profile"] not in json_content["profiles"]: raise JSONMissingProfileError() json_content.update({"manifest": manifest}) json_content.update(json_content["profiles"][req["profile"]]) json_content["id"] = req["profile"] json_content.pop("profiles") return json_content
def _dextr_thread(db_data, frame, points): job = rq.get_current_job() job.meta["result"] = __DEXTR_HANDLER.handle(db_data, frame, points) job.save_meta()
def modify_self(meta): j = get_current_job() j.meta.update(meta) j.save()
def add_job_meta(self): job = get_current_job() job.meta["conn"] = self # for identification during timeout handling job.save_meta()
def access_self(): assert get_current_connection() is not None assert get_current_job() is not None
def refresh_background(course_id): """ Look up existing extensions and apply them to new quizzes. :param course_id: The Canvas ID of the Course. :type course_id: int :rtype: dict :returns: A dictionary containing two parts: - success `bool` False if there was an error, True otherwise. - message `str` A long description of success or failure. """ job = get_current_job() update_job(job, 0, "Starting...", "started") with app.app_context(): course, created = get_or_create(db.session, Course, canvas_id=course_id) try: course_name = get_course(course_id).get("name", "<UNNAMED COURSE>") course.course_name = course_name db.session.commit() except requests.exceptions.HTTPError: update_job(job, 0, "Course not found.", "failed", error=True) logger.exception("Unable to find course #{}".format(course_id)) return job.meta quizzes = missing_and_stale_quizzes(course_id) num_quizzes = len(quizzes) if num_quizzes < 1: update_job( job, 100, "Complete. No quizzes required updates.", "complete", error=False, ) return job.meta percent_user_map = defaultdict(list) inactive_list = [] update_job(job, 0, "Getting past extensions.", "processing", False) for extension in course.extensions: # If extension is inactive, ignore. if not extension.active: inactive_list.append(extension.user.sortable_name) logger.debug("Extension #{} is inactive.".format(extension.id)) continue user_canvas_id = (User.query.filter_by( id=extension.user_id).first().canvas_id) # Check if user is in course. If not, deactivate extension. try: canvas_user = get_user(course_id, user_canvas_id) # Skip user if not a student. Fixes an edge case where a # student that previously recieved an extension changes roles. enrolls = canvas_user.get("enrollments", []) type_list = [ e["type"] for e in enrolls if e["enrollment_state"] in ("active", "invited") ] if not any(t == "StudentEnrollment" for t in type_list): logger.info( ("User #{} was found in course #{}, but is not an " "active student. Deactivating extension #{}. Roles " "found: {}").format( user_canvas_id, course_id, extension.id, ", ".join(type_list) if len(enrolls) > 0 else None, )) extension.active = False db.session.commit() inactive_list.append(extension.user.sortable_name) continue except requests.exceptions.HTTPError: log_str = "User #{} not in course #{}. Deactivating extension #{}." logger.info( log_str.format(user_canvas_id, course_id, extension.id)) extension.active = False db.session.commit() inactive_list.append(extension.user.sortable_name) continue percent_user_map[extension.percent].append(user_canvas_id) if len(percent_user_map) < 1: msg_str = "No active extensions were found.<br>" if len(inactive_list) > 0: msg_str += " Extensions for the following students are inactive:<br>{}" msg_str = msg_str.format("<br>".join(inactive_list)) update_job(job, 100, msg_str, "complete", error=False) return job.meta for index, quiz in enumerate(quizzes): quiz_id = quiz.get("id", None) quiz_title = quiz.get("title", "[UNTITLED QUIZ]") comp_perc = int(((float(index)) / float(num_quizzes)) * 100) refreshing_str = "Refreshing quiz #{} - {} [{} of {}]" update_job( job, comp_perc, refreshing_str.format(quiz_id, quiz_title, index + 1, num_quizzes), "processing", error=False, ) for percent, user_list in percent_user_map.items(): extension_response = extend_quiz(course_id, quiz, percent, user_list) if extension_response.get("success", False) is True: # add/update quiz quiz_obj, created = get_or_create(db.session, Quiz, canvas_id=quiz_id, course_id=course.id) quiz_obj.title = quiz_title quiz_obj.time_limit = quiz.get("time_limit") db.session.commit() else: error_message = "Some quizzes couldn't be updated. " error_message += extension_response.get("message", "") update_job(job, comp_perc, error_message, "failed", error=True) return job.meta msg = "{} quizzes have been updated.".format(len(quizzes)) update_job(job, 100, msg, "complete", error=False) return job.meta
def get_job_object(): return get_current_job()
def invoke_iap_export(timestamp_id, output_path, username, shared_folder_map, task_key, analysis_iap_id=None): """ This Methods represents an RQ Job workload. It should be enqueued into the RQ Analysis Queue and processed by an according worker Handles the invocation of data export of an IAP analysis on the IAP server and fetches the resulting information afterwards. The received information is then entered into the database accordingly. The analysis_iap_id has to be either passed directly or has to be stored in the result of a job that this one depends on. The key under which it must be stored from the previous job is 'response.result_id' :param timestamp_id: The ID of the :class:`~server.models.timestamp_model.TimestampModel` instance to which the data belongs :param output_path: The path, as SMB URL, where the data should be exported to :param username: The username of the user invoking this job :param task_key: The ID of the :class:`~server.modules.processing.analysis.analysis_task.AnalysisTask` to which this job belongs :param shared_folder_map: A dict containing a mapping between SMB URLs and local paths representing the corresponding mount points :param analysis_iap_id: The IAP ID of the analysis on the IAP server :return: a dict containing the 'analysis_id' for which the data has been exported and the 'path' to which the results have been exported. (All nested inside the 'response' key) """ print('EXECUTE EXPORT') job = get_current_job() log_store = get_log_store() task = AnalysisTask.from_key(get_redis_connection(), task_key) channel = get_grpc_channel() iap_stub = phenopipe_iap_pb2_grpc.PhenopipeIapStub(channel) pipe_stub = phenopipe_pb2_grpc.PhenopipeStub(channel) if analysis_iap_id is None: analysis_iap_id = job.dependency.result['response']['result_id'] log_store.put(job.id, 'Started Export Job', 0) task.update_message('Started Export Job') try: response = iap_stub.ExportExperiment( phenopipe_iap_pb2.ExportRequest(experiment_id=analysis_iap_id, destination_path=output_path) ) remote_job_id = response.job_id request = phenopipe_pb2.WatchJobRequest( job_id=remote_job_id ) status = pipe_stub.WatchJob(request) for msg in status: print(msg.message.decode('string-escape')) log_store.put(job.id, msg.message.decode('string-escape'), msg.progress) response = iap_stub.FetchExportResult( phenopipe_pb2.FetchJobResultRequest(job_id=remote_job_id) ) session = get_session() analysis = session.query(AnalysisModel) \ .filter(AnalysisModel.timestamp_id == timestamp_id) \ .filter(AnalysisModel.iap_id == analysis_iap_id) \ .one() log_store.put(job.id, 'Received Results. Started to parse and add information', 90) task.update_message('Received Results. Started to parse and add information') image_path = get_local_path_from_smb(response.image_path, shared_folder_map) # TODO handle DB errors for image_name in os.listdir(image_path): # Extract information from filename snapshot_id, _, new_filename = image_name.partition('_') _, _, angle = os.path.splitext(image_name)[0].rpartition('_') img = ImageModel(snapshot_id, response.image_path, new_filename, angle, 'segmented') session.add(img) # rename file and remove the sniapshot id os.rename(os.path.join(image_path, image_name), os.path.join(image_path, new_filename)) analysis.export_path = response.path exported_at = datetime.utcnow() analysis.exported_at = exported_at session.commit() log_store.put(job.id, 'Finished Export Job', 100) task.update_message('Finished Export Job') return create_return_object(JobType.iap_export, timestamp_id, {'analysis_id': analysis.id, 'path': response.path, 'exported_at': exported_at}) except grpc.RpcError as e: log_store.put(job.id, e.details(), 0) task.update_message('Export Job Failed') raise
def xloader_data_into_datastore_(input, job_dict): '''This function: * downloads the resource (metadata) from CKAN * downloads the data * calls the loader to load the data into DataStore * calls back to CKAN with the new status (datapusher called this function 'push_to_datastore') ''' job_id = get_current_job().id db.init(config) # Store details of the job in the db try: db.add_pending_job(job_id, **input) except sa.exc.IntegrityError: raise JobError('job_id {} already exists'.format(job_id)) # Set-up logging to the db handler = StoringHandler(job_id, input) level = logging.DEBUG handler.setLevel(level) logger = logging.getLogger(job_id) handler.setFormatter(logging.Formatter('%(message)s')) logger.addHandler(handler) # also show logs on stderr logger.addHandler(logging.StreamHandler()) logger.setLevel(logging.DEBUG) validate_input(input) data = input['metadata'] ckan_url = data['ckan_url'] resource_id = data['resource_id'] api_key = input.get('api_key') try: resource, dataset = get_resource_and_dataset(resource_id) except (JobError, ObjectNotFound) as e: # try again in 5 seconds just in case CKAN is slow at adding resource time.sleep(5) resource, dataset = get_resource_and_dataset(resource_id) resource_ckan_url = '/dataset/{}/resource/{}' \ .format(dataset['name'], resource['id']) logger.info('Express Load starting: {}'.format(resource_ckan_url)) # check if the resource url_type is a datastore if resource.get('url_type') == 'datastore': logger.info('Ignoring resource - url_type=datastore - dump files are ' 'managed with the Datastore API') return # download resource tmp_file, file_hash = _download_resource_data(resource, data, api_key, logger) # hash isn't actually stored, so this is a bit worthless at the moment if (resource.get('hash') == file_hash and not data.get('ignore_hash')): logger.info('Ignoring resource - the file hash hasn\'t changed: ' '{hash}.'.format(hash=file_hash)) return logger.info('File hash: {}'.format(file_hash)) resource['hash'] = file_hash # TODO write this back to the actual resource def direct_load(): fields = loader.load_csv( tmp_file.name, resource_id=resource['id'], mimetype=resource.get('format'), logger=logger) loader.calculate_record_count( resource_id=resource['id'], logger=logger) set_datastore_active(data, resource, api_key, ckan_url, logger) job_dict['status'] = 'running_but_viewable' callback_xloader_hook(result_url=input['result_url'], api_key=api_key, job_dict=job_dict) logger.info('Data now available to users: {}'.format(resource_ckan_url)) loader.create_column_indexes( fields=fields, resource_id=resource['id'], logger=logger) def messytables_load(): try: loader.load_table(tmp_file.name, resource_id=resource['id'], mimetype=resource.get('format'), logger=logger) except JobError as e: logger.error('Error during messytables load: {}'.format(e)) raise loader.calculate_record_count( resource_id=resource['id'], logger=logger) set_datastore_active(data, resource, api_key, ckan_url, logger) logger.info('Finished loading with messytables') # Load it logger.info('Loading CSV') just_load_with_messytables = asbool(config.get( 'ckanext.xloader.just_load_with_messytables', False)) logger.info("'Just load with messytables' mode is: {}".format( just_load_with_messytables)) try: if just_load_with_messytables: messytables_load() else: try: direct_load() except JobError as e: logger.warning('Load using COPY failed: {}'.format(e)) logger.info('Trying again with messytables') messytables_load() except FileCouldNotBeLoadedError as e: logger.warning('Loading excerpt for this format not supported.') logger.error('Loading file raised an error: {}'.format(e)) raise JobError('Loading file raised an error: {}'.format(e)) tmp_file.close() logger.info('Express Load completed')
def invoke_iap_import(timestamp_id, experiment_name, coordinator, scientist, local_path, path, username, task_key): """ This Methods represents an RQ Job workload. It should be enqueued into the RQ Analysis Queue and processed by an according worker Handles the invokation of data import into IAP on the IAP server and fetches the result information afterwards. The received information is then entered into the database accordingly :param timestamp_id: The ID of the :class:`~server.models.timestamp_model.TimestampModel` instance which should be imported :param experiment_name: The name of the experiment to import :param coordinator: The name of the experiment coordinator :param scientist: The name of the scientist carrying out the experiment :param local_path: The path to the data on the local system :param path: The SMB url representing the location of the data :param username: The username of the user invoking this job :param task_key: The redis key of the :class:`~server.modules.analysis.analysis_task.AnalysisTask` to which this job belongs :return: A dict containing the 'experiment_id' (nested in the 'response' key) returned by IAP """ print('EXECUTE IMPORT') job = get_current_job() log_store = get_log_store() task = AnalysisTask.from_key(get_redis_connection(), task_key) channel = get_grpc_channel() iap_stub = phenopipe_iap_pb2_grpc.PhenopipeIapStub(channel) pipe_stub = phenopipe_pb2_grpc.PhenopipeStub(channel) log_store.put(job.id, 'Started Import Job', 0) task.update_message('Started Import Job') log_store.put(job.id, 'Create Metadata File') task.update_message('Create Metadata File') create_iap_import_sheet(timestamp_id, local_path) log_store.put(job.id, 'Metadata File Created') task.update_message('Metadata File Created') try: log_store.put(job.id, 'Import data into IAP') task.update_message('Import data into IAP') response = iap_stub.ImportExperiment( phenopipe_iap_pb2.ImportRequest(path=path, experiment_name=experiment_name, coordinator_name=coordinator, user_name=scientist) ) remote_job_id = response.job_id request = phenopipe_pb2.WatchJobRequest( job_id=remote_job_id ) status = pipe_stub.WatchJob(request) for msg in status: log_store.put(job.id, msg.message.decode('string-escape'), msg.progress) response = iap_stub.FetchImportResult( phenopipe_pb2.FetchJobResultRequest(job_id=remote_job_id) ) session = get_session() timestamp = session.query(TimestampModel).get(timestamp_id) timestamp.iap_exp_id = response.experiment_id session.commit() log_store.put(job.id, 'Finished Import Job', 100) task.update_message('Finished Import Job') return create_return_object(JobType.iap_import, timestamp_id, {'experiment_iap_id': response.experiment_id}) except grpc.RpcError as e: if e.code() == grpc.StatusCode.ALREADY_EXISTS: session = get_session() timestamp = session.query(TimestampModel).get(timestamp_id) timestamp.iap_exp_id = e.initial_metadata()[0][1] session.commit() return create_return_object(JobType.iap_import, timestamp_id, {'experiment_iap_id': timestamp.iap_exp_id}) else: task.update_message('Import Job Failed') log_store.put(job.id, e.details(), 0) raise
def task_make_network(form_data): job = get_current_job() job.meta['progress'] = 'started' job.save_meta() network = Network(include_experimental=bool( form_data['include_experimental']), include_two_step=bool(form_data['include_two_step']), include_requires_absence_of_water=bool( form_data['include_requires_absence_of_water']), print_log=not current_app.config['PRODUCTION']) network.update_settings({ "allow_backwards_steps": bool(form_data['allow_backwards']), "remove_simple": bool(form_data['remove_small']), "similarity_score_threshold": float(form_data['sub_thres']), "combine_enantiomers": bool(form_data['combine_enantiomers']), "num_enzymes": 1, "calculate_complexities": bool(form_data['calc_complexity']), "calculate_substrate_specificity": bool(form_data['sub_sim']), "max_nodes": int(form_data['max_initial_nodes'], ), "colour_reactions": form_data['colour_reactions'], "colour_arrows": form_data['colour_edges'], "show_negative_enzymes": form_data['show_neg_enz'], "only_postitive_enzyme_data": not form_data['show_neg_enz'], "max_reactions": form_data["max_reactions"], 'only_reviewed_activity_data': bool(form_data["only_reviewed"]) }) if form_data[ "specificity_scoring_mode"] == 'Product + substrates (slower)': network.update_settings({'specificity_score_substrates': True}) #print(f"include_experimental = {network.settings['include_experimental']}") #print(f"include_two_step = {network.settings['include_two_step']}") network.generate(form_data['target_smiles'], form_data['number_steps'], calculate_scores=False) job.meta['progress'] = 'network_generated' job.save_meta() network.calculate_scores() job.meta['progress'] = 'scores_calculated' job.save_meta() nodes, edges = network.get_visjs_nodes_and_edges() #options = {'interaction': {'multiselect': 'true',}} options = {} default_network_name = 'Network for ' + str(network.target_smiles) result = { 'save_id': str(uuid.uuid4()), 'save_links': [], 'save_name': default_network_name, 'nodes': nodes, 'edges': edges, 'options': json.dumps(options), 'graph_dict': json.dumps(nx.to_dict_of_lists(network.graph)), 'target_smiles': str(network.target_smiles), 'network_options': json.dumps(network.settings), 'attr_dict': json.dumps(network.attributes_dict()), 'max_reactions': int(network.settings['max_reactions']) } current_app.redis.mset({job.id: json.dumps(result)}) time_to_expire = 15 * 60 #15 mins * 60 seconds current_app.redis.expire(job.id, time_to_expire) return result
def _create_thread(tid, data): slogger.glob.info("create task #{}".format(tid)) db_task = models.Task.objects.select_for_update().get(pk=tid) db_data = db_task.data if db_task.data.size != 0: raise NotImplementedError("Adding more data is not implemented") upload_dir = db_data.get_upload_dirname() if data['remote_files']: data['remote_files'] = _download_data(data['remote_files'], upload_dir) manifest_file = [] media = _count_files(data, manifest_file) media, task_mode = _validate_data(media, manifest_file) if manifest_file: assert settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE, \ "File with meta information can be uploaded if 'Use cache' option is also selected" if data['server_files']: if db_data.storage == StorageChoice.LOCAL: _copy_data_from_share(data['server_files'], upload_dir) else: upload_dir = settings.SHARE_ROOT av_scan_paths(upload_dir) job = rq.get_current_job() job.meta['status'] = 'Media files are being extracted...' job.save_meta() db_images = [] extractor = None for media_type, media_files in media.items(): if media_files: if extractor is not None: raise Exception('Combined data types are not supported') source_paths = [os.path.join(upload_dir, f) for f in media_files] if media_type in {'archive', 'zip' } and db_data.storage == StorageChoice.SHARE: source_paths.append(db_data.get_upload_dirname()) upload_dir = db_data.get_upload_dirname() db_data.storage = StorageChoice.LOCAL extractor = MEDIA_TYPES[media_type]['extractor']( source_path=source_paths, step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], ) validate_dimension = ValidateDimension() if extractor.__class__ == MEDIA_TYPES['zip']['extractor']: extractor.extract() validate_dimension.set_path( os.path.split(extractor.get_zip_filename())[0]) validate_dimension.validate() if validate_dimension.dimension == DimensionType.DIM_3D: db_task.dimension = DimensionType.DIM_3D extractor.reconcile( source_files=list(validate_dimension.related_files.keys()), step=db_data.get_frame_step(), start=db_data.start_frame, stop=data['stop_frame'], dimension=DimensionType.DIM_3D, ) extractor.add_files(validate_dimension.converted_files) related_images = {} if isinstance(extractor, MEDIA_TYPES['image']['extractor']): extractor.filter(lambda x: not re.search( r'(^|{0})related_images{0}'.format(os.sep), x)) related_images = detect_related_images(extractor.absolute_source_paths, upload_dir) db_task.mode = task_mode db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[ 'use_zip_chunks'] else models.DataChoice.IMAGESET db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET def update_progress(progress): progress_animation = '|/-\\' if not hasattr(update_progress, 'call_counter'): update_progress.call_counter = 0 status_template = 'Images are being compressed {}' if progress: current_progress = '{}%'.format(round(progress * 100)) else: current_progress = '{}'.format( progress_animation[update_progress.call_counter]) job.meta['status'] = status_template.format(current_progress) job.save_meta() update_progress.call_counter = (update_progress.call_counter + 1) % len(progress_animation) compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter if db_data.original_chunk_type == DataChoice.VIDEO: original_chunk_writer_class = Mpeg4ChunkWriter # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so. # A lower value will significantly increase the chunk size with a slight increase of quality. original_quality = 67 else: original_chunk_writer_class = ZipChunkWriter original_quality = 100 kwargs = {} if validate_dimension.dimension == DimensionType.DIM_3D: kwargs["dimension"] = validate_dimension.dimension compressed_chunk_writer = compressed_chunk_writer_class( db_data.image_quality, **kwargs) original_chunk_writer = original_chunk_writer_class(original_quality) # calculate chunk size if it isn't specified if db_data.chunk_size is None: if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter): w, h = extractor.get_image_size(0) area = h * w db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area)) else: db_data.chunk_size = 36 video_path = "" video_size = (0, 0) def _update_status(msg): job.meta['status'] = msg job.save_meta() if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE: for media_type, media_files in media.items(): if not media_files: continue # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl') if manifest_file and not os.path.exists( db_data.get_manifest_path()): shutil.copyfile(os.path.join(upload_dir, manifest_file[0]), db_data.get_manifest_path()) if upload_dir != settings.SHARE_ROOT: os.remove(os.path.join(upload_dir, manifest_file[0])) if task_mode == MEDIA_TYPES['video']['mode']: try: manifest_is_prepared = False if manifest_file: try: manifest = VideoManifestValidator( source_path=os.path.join( upload_dir, media_files[0]), manifest_path=db_data.get_manifest_path()) manifest.init_index() manifest.validate_seek_key_frames() manifest.validate_frame_numbers() assert len(manifest) > 0, 'No key frames.' all_frames = manifest['properties']['length'] video_size = manifest['properties']['resolution'] manifest_is_prepared = True except Exception as ex: if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) if isinstance(ex, AssertionError): base_msg = str(ex) else: base_msg = 'Invalid manifest file was upload.' slogger.glob.warning(str(ex)) _update_status( '{} Start prepare a valid manifest file.'. format(base_msg)) if not manifest_is_prepared: _update_status('Start prepare a manifest file') manifest = VideoManifestManager( db_data.get_manifest_path()) meta_info = manifest.prepare_meta( media_file=media_files[0], upload_dir=upload_dir, chunk_size=db_data.chunk_size) manifest.create(meta_info) manifest.init_index() _update_status('A manifest had been created') all_frames = meta_info.get_size() video_size = meta_info.frame_sizes manifest_is_prepared = True db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \ if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step())) video_path = os.path.join(upload_dir, media_files[0]) except Exception as ex: db_data.storage_method = StorageMethodChoice.FILE_SYSTEM if os.path.exists(db_data.get_manifest_path()): os.remove(db_data.get_manifest_path()) if os.path.exists(db_data.get_index_path()): os.remove(db_data.get_index_path()) base_msg = str(ex) if isinstance(ex, AssertionError) \ else "Uploaded video does not support a quick way of task creating." _update_status( "{} The task will be created using the old method". format(base_msg)) else: # images, archive, pdf db_data.size = len(extractor) manifest = ImageManifestManager(db_data.get_manifest_path()) if not manifest_file: if db_task.dimension == DimensionType.DIM_2D: meta_info = manifest.prepare_meta( sources=extractor.absolute_source_paths, meta={ k: { 'related_images': related_images[k] } for k in related_images }, data_dir=upload_dir) content = meta_info.content else: content = [] for source in extractor.absolute_source_paths: name, ext = os.path.splitext( os.path.relpath(source, upload_dir)) content.append({ 'name': name, 'meta': { 'related_images': related_images[''.join((name, ext))] }, 'extension': ext }) manifest.create(content) manifest.init_index() counter = itertools.count() for _, chunk_frames in itertools.groupby( extractor.frame_range, lambda x: next(counter) // db_data.chunk_size): chunk_paths = [(extractor.get_path(i), i) for i in chunk_frames] img_sizes = [] for _, frame_id in chunk_paths: properties = manifest[frame_id] if db_task.dimension == DimensionType.DIM_2D: resolution = (properties['width'], properties['height']) else: resolution = extractor.get_image_size(frame_id) img_sizes.append(resolution) db_images.extend([ models.Image(data=db_data, path=os.path.relpath(path, upload_dir), frame=frame, width=w, height=h) for (path, frame), (w, h) in zip(chunk_paths, img_sizes) ]) if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE: counter = itertools.count() generator = itertools.groupby( extractor, lambda x: next(counter) // db_data.chunk_size) for chunk_idx, chunk_data in generator: chunk_data = list(chunk_data) original_chunk_path = db_data.get_original_chunk_path(chunk_idx) original_chunk_writer.save_as_chunk(chunk_data, original_chunk_path) compressed_chunk_path = db_data.get_compressed_chunk_path( chunk_idx) img_sizes = compressed_chunk_writer.save_as_chunk( chunk_data, compressed_chunk_path) if db_task.mode == 'annotation': db_images.extend([ models.Image(data=db_data, path=os.path.relpath(data[1], upload_dir), frame=data[2], width=size[0], height=size[1]) for data, size in zip(chunk_data, img_sizes) ]) else: video_size = img_sizes[0] video_path = chunk_data[0][1] db_data.size += len(chunk_data) progress = extractor.get_progress(chunk_data[-1][2]) update_progress(progress) if db_task.mode == 'annotation': models.Image.objects.bulk_create(db_images) created_images = models.Image.objects.filter(data_id=db_data.id) db_related_files = [ RelatedFile(data=image.data, primary_image=image, path=os.path.join(upload_dir, related_file_path)) for image in created_images for related_file_path in related_images.get(image.path, []) ] RelatedFile.objects.bulk_create(db_related_files) db_images = [] else: models.Video.objects.create(data=db_data, path=os.path.relpath( video_path, upload_dir), width=video_size[0], height=video_size[1]) if db_data.stop_frame == 0: db_data.stop_frame = db_data.start_frame + ( db_data.size - 1) * db_data.get_frame_step() else: # validate stop_frame db_data.stop_frame = min(db_data.stop_frame, \ db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step()) preview = extractor.get_preview() preview.save(db_data.get_preview_path()) slogger.glob.info("Found frames {} for Data #{}".format( db_data.size, db_data.id)) _save_task_to_db(db_task)
def train_script_wrapper(cls, project, label_config, train_kwargs, initialization_params=None, tasks=()): if initialization_params: # Reinitialize new cls instance for using in RQ context initialization_params = initialization_params or {} cls.initialize(**initialization_params) # fetching the latest model version before we generate the next one t = time.time() m = cls.fetch(project, label_config) m.is_training = True version = cls._generate_version() if cls.model_dir: logger.debug('Running in model dir: ' + cls.model_dir) project_model_dir = os.path.join(cls.model_dir, project or '') workdir = os.path.join(project_model_dir, version) os.makedirs(workdir, exist_ok=True) else: logger.debug('Running without model dir') workdir = None if cls.without_redis(): data_stream = tasks else: data_stream = ( json.loads(t) for t in cls._redis.lrange(cls._get_tasks_key(project), 0, -1)) if workdir: data_stream, snapshot = tee(data_stream) cls.create_data_snapshot(snapshot, workdir) try: train_output = m.model.fit(data_stream, workdir, **train_kwargs) if cls.without_redis(): job_id = None else: job_id = get_current_job().id job_result = json.dumps({ 'status': 'ok', 'train_output': train_output, 'project': project, 'workdir': workdir, 'version': version, 'job_id': job_id, 'time': time.time() - t }) if workdir: job_result_file = os.path.join(workdir, 'job_result.json') with open(job_result_file, mode='w') as fout: fout.write(job_result) if not cls.without_redis(): cls._redis.rpush(cls._get_job_results_key(project), job_result) except: raise finally: m.is_training = False return job_result
def check_dependencies_are_met(): return get_current_job().dependencies_are_met()
def save_key_ttl(key): # Stores key ttl in meta job = get_current_job() ttl = job.connection.ttl(key) job.meta = {'ttl': ttl} job.save_meta()