Example #1
0
def render_task(dburl, docpath, slug):
    """Render a document."""
    oldcwd = os.getcwd()
    try:
        os.chdir(os.path.join(docpath, slug))
    except:
        db = StrictRedis.from_url(dburl)
        job = get_current_job(db)
        job.meta.update({'out': 'Document not found.', 'return': 127, 'status': False})
        return 127

    db = StrictRedis.from_url(dburl)
    job = get_current_job(db)
    job.meta.update({'out': '', 'milestone': 0, 'total': 1, 'return': None,
                     'status': None})
    job.save()

    p = subprocess.Popen(('lualatex', '--halt-on-error', slug + '.tex'),
                         stdout=subprocess.PIPE)

    out = []

    while p.poll() is None:
        nl = p.stdout.readline()
        out.append(nl)
        job.meta.update({'out': ''.join(out), 'return': None,
                         'status': None})
        job.save()

    out = ''.join(out)
    job.meta.update({'out': ''.join(out), 'return': p.returncode, 'status':
                     p.returncode == 0})
    job.save()
    os.chdir(oldcwd)
    return p.returncode
Example #2
0
def setup_container(d_os):
     cur_job = get_current_job()
     cur_job.meta['ownername'] = str(d_os['username'])
     cur_job.meta['request_status'] = "Performing status check"
     cur_job.save()
     cur_job.refresh()
     if d_os['ct_type'] == "openvz":
	cmd="vzctl set "+d_os['cid']+" --ipadd "+d_os['ipadd']+" --hostname "+d_os['hname']+" --nameserver "+d_os['nserver']+" --userpasswd "+d_os['usr']+":"+d_os['pwd']+" --save"
	print cmd
	out = check_output(shlex.split(cmd))
     elif d_os['ct_type'] == "aws_vm":
        #create file under /home/laks/tmp/tutorials with ipaddr.
        fpathname = "/home/laks/tmp/tutorials/" + str(d_os['instance'].ip_address) + ".json"
        fd = open(fpathname,"w")
        fd.write(d_os['tutorial'])
        fd.close()

        spoty.install_sw(d_os['instance'],d_os['repo'])
        findreplace = [
        ("SUBDOMAIN",d_os['username']),
        ("IPADDRESS",d_os['instance'].ip_address)
        ]
        creat_nginx_tmpl(findreplace,d_os)
        reload_nginx()

     else:
        print "setting up subdomain for user" + str(d_os['username'])
        cmd = "docker inspect --format '{{ .NetworkSettings.IPAddress }}' " +str(d_os['imgid'])
        proc = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        ipaddr, err = proc.communicate()
        print ipaddr
        if d_os['code'] == 1:
		findreplace = [
		("SUBDOMAIN",d_os['username']+str(d_os['code'])),
		("IPADDRESS",ipaddr)
		]
        else:
		findreplace = [
		("SUBDOMAIN",d_os['username']),
		("IPADDRESS",ipaddr)
		]
        creat_nginx_tmpl(findreplace,d_os)
        reload_nginx()
        time.sleep(2)
        setup_docker_ct_helper(d_os)

     cur_job = get_current_job()
     cur_job.meta['ownername'] = str(d_os['username'])
     cur_job.meta['request_status'] = "Running, please login"
     cur_job.save()
     cur_job.refresh()
     #next queue
     if d_os['proceed_nextq'] :
           with Connection(Redis()):
         	q=Queue('startq', default_timeout=15000)
  		job = q.enqueue_call(func=start_container,args=(d_os,),result_ttl=600)
Example #3
0
 def __init__(self):
     self.job = get_current_job()
     self.timeout = 60 * 10
     self.start_date = time.time()
     while 'mongo_id' not in self.job.meta:
         self.job = get_current_job()
     self.mongo_id = ObjectId(self.job.meta['mongo_id'])
     file_ = inspect.getfile(self.__class__)
     self.name = os.path.basename(file_).split('.')[0]
     self.result = "Ok"
     self._do_run = True
     self.run()
Example #4
0
def add_job(username, domain, project):

    _job = get_current_job()

    payload = {'project': project, 'username': username, 'spider': domain, 'jobid': _job.id}
    req = urllib2.urlopen(scrapyd_uri, data=urllib.urlencode(payload))
    if req.getcode() != 200:
        raise Exception

    while True:
        job = get_current_job()
        print 'job waiting. jobid: %s, meta: %s' % (job.id, job.meta)
        if 'status' in job.meta:
            return
        time.sleep(5)
Example #5
0
 def _poolJobs(self, db_name, check=False):
     """Check if we are a worker process.
     """
     if get_current_connection() and get_current_job():
         pass
     else:
         super(IrCron, self)._poolJobs(db_name, check)
Example #6
0
def build_pkg_handler():
    """


    :return:
    """
    status.idle = False
    packages = status.queue
    if len(packages) > 0:
        pack = status.queue.lpop()
        if pack and pack is not None and pack != '':
            pkgobj = package.get_pkg_object(name=pack)
        else:
            return False

        rqjob = get_current_job(db)
        rqjob.meta['package'] = pkgobj.name
        rqjob.save()

        status.now_building = pkgobj.name

        if pkgobj.is_iso is True or pkgobj.is_iso == 'True':
            status.iso_building = True
            build_result = build_iso(pkgobj)
        else:
            build_result = build_pkgs(pkgobj)

        # TODO: Move this into its own method
        if build_result is not None:
            completed = status.completed
            failed = status.failed
            blds = pkgobj.builds
            total = len(blds)
            if total > 0:
                success = len([x for x in blds if x in completed])
                failure = len([x for x in blds if x in failed])
                if success > 0:
                    success = 100 * success / total
                else:
                    success = 0
                if failure > 0:
                    failure = 100 * failure / total
                else:
                    failure = 0
                pkgobj.success_rate = success
                pkgobj.failure_rate = failure

        if build_result is True:
            run_docker_clean(pkgobj.pkgname)

    if not status.queue and not status.hook_queue:
        remove('/opt/antergos-packages')
        status.idle = True
        status.building = 'Idle'
        status.now_building = 'Idle'
        status.container = ''
        status.building_num = ''
        status.building_start = ''
        status.iso_building = False
        logger.info('All builds completed.')
Example #7
0
def stop(path, machineName, host, environment):
    new_env = resetEnv(host, environment)
    logger.debug('Bring down {}'.format(path))
    old_path = os.getcwd()
    jobid = get_current_job().id
    try:
        os.chdir(path)
        _open_console(jobid)
        if machineName != '':
            _l = lambda line: _log_console(jobid, str(line))
            sh.vagrant('halt', machineName,
                       _ok_code=[0, 1, 2],
                       _out=_l, _err=_l,
                       _env=new_env).wait()
        else:
            _l = lambda line: _log_console(jobid, str(line))
            sh.vagrant('halt',
                       _ok_code=[0, 1, 2],
                       _out=_l, _err=_l,
                       _env=new_env).wait()
    except:
        logger.error('Failed to shut down machine {}'.format(path),
                     exc_info=True)

    _close_console(jobid)
    os.chdir(old_path)
    # logger.debug('Done bring down {}'.format(path))
    return json.dumps(_get_status(path, host, environment))
Example #8
0
def provision(path, environment, machineName, host):
    new_env = resetEnv(host, environment)
    # logger.debug('Running provision on {} with env {}'
    #            .format(path, environment))
    old_path = os.getcwd()
    jobid = get_current_job().id
    try:
        os.chdir(path)
        _open_console(jobid)
        if machineName != '':
            _l = lambda line: _log_console(jobid, str(line))
            sh.vagrant('provision', machineName,
                       _ok_code=[0, 1, 2],
                       _out=_l, _err=_l,
                       _env=new_env).wait()
        else:
            _l = lambda line: _log_console(jobid, str(line))
            sh.vagrant('provision',
                       _ok_code=[0, 1, 2],
                       _out=_l, _err=_l,
                       _env=new_env).wait()
    except:
        logger.error('Failed to provision machine at {}'.format(path),
                     exc_info=True)
    _close_console(jobid)
    os.chdir(old_path)
    return json.dumps(_get_status(path, host, environment))
Example #9
0
def run_tests(payload):
    #payload = get_payload(payload_id)
    job = get_current_job()

    # work out the repo_url
    repo_name = payload['repository']['name']
    owner = payload['repository']['owner']['name']
    repo_url = "[email protected]:%s/%s.git" % (owner, repo_name)

    update_progress(job, 'repo url: %s' % repo_url)
    logger.info("repo: %s" % repo_url)

    vpath = tempfile.mkdtemp(suffix="ridonkulous")

    logger.info("cloning repo %s to: %s" % (repo_url, vpath))
    update_progress(job, "cloning repo %s to: %s" % (repo_url, vpath))

    create_environment(vpath, site_packages=False)

    os.chdir(vpath)

    git.Git().clone(repo_url)
    os.chdir(os.path.join(vpath, repo_name))

    pip = "%s/bin/pip" % vpath
    #python = "%s/bin/python"
    nose = "%s/bin/nosetests" % vpath

    ret = subprocess.call(r'%s install -r requirements.txt --use-mirrors' % pip, shell=True)

    logger.info("running nose")
    ret = subprocess.call(r'%s' % nose, shell=True)
    logger.info(ret)
    update_progress(job, 'done')
    return 'ok'
Example #10
0
def archive(files):
    """
    argument expected:
    [
        {"path":"/path/to/file", "name","file_name_in_zip"},
        { ... },
        ...
    ]
    returns a s3 url
    """
    job = rq.get_current_job(get_redis())
    handle, tempname = tempfile.mkstemp()
    os.close(handle)
    with ZipFile(tempname, mode='w', compression=ZIP_DEFLATED) as zipfile:
        for file_ in files:
            zipfile.write(file_['path'], file_['name'])

    job.meta['size'] = si_unit(os.path.getsize(tempname))
    job.save()

    objname = str(uuid.uuid4()) + ".zip"

    s3 = boto3.resource('s3')
    s3.Bucket(bucket).upload_file(tempname, objname, ExtraArgs={'ContentType':'application/zip'})

    os.remove(tempname)

    url = "https://%s.s3.amazonaws.com/%s" % (bucket, objname)
    return url
Example #11
0
def rsync(path, host, environment, machineName=None):
    new_env = resetEnv(host, environment)
    old_path = os.getcwd()
    os.chdir(path)
    try:
        jobid = get_current_job().id
        _open_console(jobid)
        _log_console(
            jobid,
            'Running rsync on machine {}.\n'.format(machineName)
        )

        _l = lambda line: _log_console(jobid, str(line))

        if machineName is not None:
            sh.vagrant('rsync', machineName,
                       _out=_l,
                       _err=_l,
                       _ok_code=[0, 1, 2],
                       _env=new_env).wait()
        else:
            sh.vagrant('rsync',
                       _out=_l,
                       _err=_l,
                       _ok_code=[0, 1, 2],
                       _env=new_env).wait()
        _log_console(
            jobid,
            'rsync is done running on machine {}.\n'.format(machineName))
        _close_console(jobid)
    except:
        return json.dumps({'msg': 'error trying to run vagrant rsync'})
    os.chdir(old_path)
    return json.dumps({'msg': 'rsync done'})
Example #12
0
    def _employees(self, company_name="", keyword=""):
        ''' Linkedin Scrape '''
        # TODO - add linkedin directory search
        ''' Linkedin Scrape'''
        args = '-inurl:"/dir/" -inurl:"/find/" -inurl:"/updates"'
        args = args+' -inurl:"job" -inurl:"jobs2" -inurl:"company"'
        qry = '"at {0}" {1} {2} site:linkedin.com'
        qry = qry.format(company_name, args, keyword)
        results = Google().search(qry, 10)
        results = results.dropna()
        results = Google()._google_df_to_linkedin_df(results)
        _name = '(?i){0}'.format(company_name)
        if " " in company_name:
            results['company_score'] = [fuzz.partial_ratio(_name, company) 
                                        for company in results.company]
        else:
            results['company_score'] = [fuzz.ratio(_name, company) 
                                        for company in results.company]
        if keyword != "":
            results['score'] = [fuzz.ratio(keyword, title) 
                                for title in results.title]
            results = results[results.score > 75]

        results = results[results.company_score > 64]
        results = results.drop_duplicates()
        data = {'data': results.to_dict('r'), 'company_name':company_name}
        CompanyExtraInfoCrawl()._persist(data, "employees", "")

        job = rq.get_current_job()
        print job.meta.keys()
        if "queue_name" in job.meta.keys():
          if RQueue()._has_completed(job.meta["queue_name"]):
            q.enqueue(Jigsaw()._upload_csv, job.meta["company_name"])
        return results
Example #13
0
    def update_repo(self):
        with Connection(db):
            current_job = get_current_job()
            if 'update_repo' != current_job.origin:
                logger.error('Only the repo worker can update repos!')
                return

        trans_running = status.transactions_running or status.transaction_queue
        building_saved = False
        excluded = [
            'Updating antergos repo database.',
            'Updating antergos-staging repo database.',
            'Processing developer review result.',
            'Checking remote package sources for changes.',
        ]

        if not status.idle and trans_running and status.current_status not in excluded:
            building_saved = status.current_status
        elif status.idle:
            status.idle = False

        msg = excluded[0] if 'antergos' == self.name else excluded[1]
        status.current_status = msg

        self._update_repo()

        trans_running = status.transactions_running or status.transaction_queue

        if building_saved and not status.idle and status.current_status == msg:
            status.current_status = building_saved

        elif status.idle or not trans_running:
            status.idle = True
            status.current_status = 'Idle.'
Example #14
0
 def get_csv_rows(self, queryset, type, model):
     data = [self.get_csv_header()]
     total = queryset.count()
     processed = 0
     job = get_current_job()
     for asset in queryset:
         row = ['part', ] if asset.part_info else ['device', ]
         for item in self.columns:
             field = item.field
             if field:
                 nested_field_name = item.foreign_field_name
                 if nested_field_name == type:
                     cell = self.get_cell(
                         getattr(asset, type), field, model
                     )
                 elif nested_field_name == 'part_info':
                     cell = self.get_cell(asset.part_info, field, PartInfo)
                 elif nested_field_name == 'venture':
                     cell = self.get_cell(asset.venture, field, Venture)
                 else:
                     cell = self.get_cell(asset, field, Asset)
                 row.append(unicode(cell))
         data.append(row)
         processed += 1
         if job:
             job.meta['progress'] = processed / total
             if not job.meta['start_progress']:
                 job.meta['start_progress'] = datetime.datetime.now()
             job.save()
     if job:
         job.meta['progress'] = 1
         job.save()
     return data
Example #15
0
def save_assembly_job(assembly, fasta_path, calculate_fourmers,
                      search_genes, email=None, 
                      coverage_filename=None, bulk_size=5000):
    job = get_current_job()

    # Find essential genes
    essential_genes = None
    if search_genes:
        job.meta['status'] = 'Searching for essential genes per contig'
        job.save()
        essential_genes = find_essential_genes_per_contig(fasta_path)

    # Save contigs to database
    job.meta['status'] = 'Saving contigs'
    job.save()
    args = [assembly, fasta_path, calculate_fourmers, essential_genes, bulk_size]
    if coverage_filename is not None:
        samples, coverages = read_coverages(coverage_filename)
        args.append(coverages)
        assembly.samples = ','.join(samples)
    notfound = save_contigs(*args)
    job.meta['notfound'].extend(notfound)
    job.save()

    assembly.busy = False
    db.session.add(assembly)
    db.session.commit()

    if email:
        utils.send_completion_email(email, assembly.name)

    return {'assembly': assembly.id}
Example #16
0
def process_document(path, options, meta):
    current_task = get_current_job()
    with Office(app.config["LIBREOFFICE_PATH"]) as office: # acquire libreoffice lock
        with office.documentLoad(path) as original_document: # open original document
            with TemporaryDirectory() as tmp_dir: # create temp dir where output'll be stored
                for fmt in options["formats"]: # iterate over requested formats
                    current_format = app.config["SUPPORTED_FORMATS"][fmt]
                    output_path = os.path.join(tmp_dir, current_format["path"])
                    original_document.saveAs(output_path, fmt=current_format["fmt"])
                if options.get("thumbnails", None):
                    is_created = False
                    if meta["mimetype"] == "application/pdf":
                        pdf_path = path
                    elif "pdf" in options["formats"]:
                        pdf_path = os.path.join(tmp_dir, "pdf")
                    else:
                        pdf_tmp_file = NamedTemporaryFile()
                        pdf_path = pdf_tmp_file.name
                        original_document.saveAs(pdf_tmp_file.name, fmt="pdf")
                        is_created = True
                    image = Image(filename=pdf_path,
                                  resolution=app.config["THUMBNAILS_DPI"])
                    if is_created:
                        pdf_tmp_file.close()
                    thumbnails = make_thumbnails(image, tmp_dir, options["thumbnails"]["size"])
                result_path, result_url = make_zip_archive(current_task.id, tmp_dir)
        remove_file.schedule(
            datetime.timedelta(seconds=app.config["RESULT_FILE_TTL"]),
            result_path
        )
    return result_url
Example #17
0
 def get_csv_rows(self, queryset, type, model):
     data = [self.get_csv_header()]
     total = queryset.count()
     processed = 0
     job = get_current_job()
     for asset in queryset:
         row = ['part'] if asset.part_info else ['device']
         for item in self.columns:
             field = item.field
             if field:
                 nested_field_name = item.foreign_field_name
                 if nested_field_name == type:
                     cell = self.get_cell(
                         getattr(asset, type), field, model
                     )
                 elif nested_field_name == 'part_info':
                     cell = self.get_cell(asset.part_info, field, PartInfo)
                 elif nested_field_name == 'venture':
                     cell = self.get_cell(asset.venture, field, Venture)
                 elif nested_field_name == 'is_discovered':
                     cell = unicode(asset.is_discovered)
                 else:
                     cell = self.get_cell(asset, field, Asset)
                 row.append(unicode(cell))
         data.append(row)
         processed += 1
         set_progress(job, processed / total)
     set_progress(job, 1)
     return data
Example #18
0
def walk(client, metadata, bytes_read, total_bytes):
    job = get_current_job()
    dir_path = os.path.basename(metadata['path'])
    bytes = metadata['bytes']
    bytes_read += int(bytes)
    update_progress(job, float(bytes_read) / total_bytes, dir_path)

    result = {'name':os.path.basename(dir_path), 'children':[], 'value':bytes}

    if 'contents' in metadata:
        for dir_entry in metadata['contents']:
            path = dir_entry['path']
            # Skip hidden files, shit gets too rowdy
            if os.path.basename(path)[0] == '.':
                continue
            dir_entry_bytes = dir_entry['bytes']
            bytes_read += int(dir_entry_bytes)
            update_progress(job, float(bytes_read) / total_bytes, path)
            if dir_entry_bytes is 0:
                child, bytes_read = walk(client, get_metadata(client, path), bytes_read, total_bytes)
            else:
                child = {'name':os.path.basename(path), 'value':dir_entry_bytes}
            result['children'].append(child)
    #empty directories? do we care?
    if len(result['children']) is 0:
        _ = result.pop('children', None)
    return result, bytes_read
Example #19
0
def scan_address_job(
    ip_address=None,
    plugins=None,
    results=None,
    automerge=AUTOMERGE_MODE,
    called_from_ui=False,
    **kwargs
):
    """The function that is actually running on the worker."""

    job = rq.get_current_job()
    available_plugins = getattr(settings, 'SCAN_PLUGINS', {}).keys()
    if not plugins:
        plugins = available_plugins
    run_postprocessing = not (set(available_plugins) - set(plugins))
    if ip_address and plugins:
        if not kwargs:
            ip, created = IPAddress.concurrent_get_or_create(
                address=ip_address,
            )
            if not (ip.snmp_name and ip.snmp_community):
                message = "SNMP name/community is missing. Forcing autoscan."
                job.meta['messages'] = [
                    (ip_address, 'ralph.scan', 'info', message)
                ]
                job.save()
                autoscan_address(ip_address)
            kwargs = {
                'snmp_community': ip.snmp_community,
                'snmp_version': ip.snmp_version,
                'http_family': ip.http_family,
                'snmp_name': ip.snmp_name,
            }
        results = _run_plugins(ip_address, plugins, job, **kwargs)
    if run_postprocessing:
        _scan_postprocessing(results, job, ip_address)
        if automerge and job.meta.get('changed', True):
            # Run only when automerge mode is enabled and some change was
            # detected. When `change` state is not available just run it...
            save_job_results(job.id)
        elif not called_from_ui and job.args and job.meta.get('changed', True):
            # Run only when some change was detected. When `change` state is
            # not available just run it...
            try:
                ip_obj = IPAddress.objects.select_related().get(
                    address=job.args[0]  # job.args[0] == ip_address
                )
            except IPAddress.DoesNotExist:
                pass
            else:
                for plugin_name in getattr(
                    settings, 'SCAN_POSTPROCESS_ENABLED_JOBS', []
                ):
                    try:
                        module = import_module(plugin_name)
                    except ImportError as e:
                        logger.error(unicode(e))
                    else:
                        module.run_job(ip_obj)
    return results
Example #20
0
def _scan_address(address, plugins, **kwargs):
    """The function that is actually running on the worker."""

    job = rq.get_current_job()
    results = {}
    job.meta['messages'] = []
    job.meta['finished'] = []
    job.meta['status'] = {}
    for plugin_name in plugins:
        message = "Running plugin %s." % plugin_name
        job.meta['messages'].append((address, plugin_name, 'info', message))
        job.save()
        try:
            module = import_module(plugin_name)
        except ImportError as e:
            message = 'Failed to import: %s.' % e
            job.meta['messages'].append((address, plugin_name, 'error', message))
            job.meta['status'][plugin_name] = 'error'
        else:
            result = module.scan_address(address, **kwargs)
            results[plugin_name] = result
            for message in result.get('messages', []):
                job.meta['messages'].append((address, plugin_name, 'warning', message))
            job.meta['status'][plugin_name] = result.get('status', 'success')
        job.meta['finished'].append(plugin_name)
        job.save()
    return results
Example #21
0
    def process_and_save_build_metadata(self, version_str=None):
        """
        Initializes the build metadata.

        Args:
            pkg_obj (Package): Package object for the package being built.

        Returns:
            Build: A build object.

        """

        self.start_str = self.datetime_to_string(datetime.now())

        if version_str:
            self.version_str = version_str
        else:
            self.version_str = self._pkg_obj.version_str

        pkg_link = '<a href="/package/{0}">{0}</a>'.format(self._pkg_obj.pkgname)

        tpl = 'Build <a href="/build/{0}">{0}</a> for {1} <strong>{2}</strong> started.'

        tlmsg = tpl.format(self.bnum, pkg_link, self.version_str)

        get_timeline_object(msg=tlmsg, tl_type=3, ret=False)

        self._pkg_obj.builds.append(self.bnum)
        status.now_building.append(self.bnum)

        with Connection(self.db):
            current_job = get_current_job()
            current_job.meta['building_num'] = self.bnum
            current_job.save()
Example #22
0
File: manual.py Project: ar4s/ralph
def scan_address_job(
    ip_address=None,
    plugins=None,
    results=None,
    automerge=AUTOMERGE_MODE,
    **kwargs
):
    """
    The function that is actually running on the worker.
    """

    job = rq.get_current_job()
    available_plugins = getattr(settings, 'SCAN_PLUGINS', {}).keys()
    if not plugins:
        plugins = available_plugins
    run_postprocessing = not (set(available_plugins) - set(plugins))
    if ip_address and plugins:
        if not kwargs:
            ip, created = IPAddress.concurrent_get_or_create(
                address=ip_address,
            )
            kwargs = {
                'snmp_community': ip.snmp_community,
                'snmp_version': ip.snmp_version,
                'http_family': ip.http_family,
                'snmp_name': ip.snmp_name,
            }
        results = _run_plugins(ip_address, plugins, job, **kwargs)
    if run_postprocessing:
        _scan_postprocessing(results, job, ip_address)
        # Run only when automerge mode is enabled and some change was detected.
        # When `change` state is not available just run it...
        if automerge and job.meta.get('changed', True):
            save_job_results(job.id)
    return results
Example #23
0
 def test(self, company_name):
     job = rq.get_current_job()
     print job.meta.keys()
     if "queue_name" in job.meta.keys():
       print RQueue()._has_completed(job.meta["queue_name"])
       print RQueue()._has_completed("queue_name")
       if RQueue()._has_completed(job.meta["queue_name"]):
         q.enqueue(Jigsaw()._upload_csv, job.meta["company_name"])
Example #24
0
def nhmmer_search(sequence, description):
    """
    RQ worker function.
    """
    job = get_current_job()
    save_query(sequence, job.id, description)
    filename = NhmmerSearch(sequence=sequence, job_id=job.id)()
    save_results(filename, job.id)
Example #25
0
def create_container(d_os):
    num = d_os['num_instance']
    if d_os['ct_type'] == "docker" and already_running(d_os['username'],d_os['code']):
          print "Ignore  ct_create request"
          return

    print " -->>Running for user " + str(d_os['username']) + "with ct_type" + str(d_os['ct_type']) + "uptime is:" + str(d_os['container_uptime'])
    cur_job = get_current_job()
    cur_job.meta['ownername'] = str(d_os['username'])
    cur_job.save()
    cur_job.refresh()

    while num > 0:
            if d_os['ct_type'] == "openvz": 
          	 cmd="vzctl create "+d_os['cid']+" --ostemplate "+ d_os['ostemplate']
            elif d_os['ct_type'] == "aws_vm":
                 ec2_conn=spoty.ec2_connect()
		 config_entry =  spoty.read_conf_file(d_os['repo'])   #Read distro specific config file.
    		 cur_job.meta['request_status'] = "Reading config files"
		 cur_job.save()
		 cur_job.refresh()
                 spot,bdm = spoty.req_instance_and_tag(ec2_conn,config_entry)
    		 cur_job.meta['request_status'] = "Creating VM"
		 cur_job.save()
		 cur_job.refresh()
                 instance=spoty.set_bdm(spot,bdm,ec2_conn,config_entry)
    		 cur_job.meta['request_status'] = "Booting VM"
		 cur_job.save()
		 cur_job.refresh()
                 #push it into d_os
                 d_os['instance'] = instance
                 d_os['ec2_conn'] = ec2_conn
                 cmd = "uname -a"
            else:
		 d_os['repo_vers']='2'
		 if d_os['code'] == 1:
                      d_os['repo_vers']='3'
		      d_os['container_uptime'] = 3600
                 cmd="docker run --user wmuser --name "+ d_os['username']+str(d_os['code']) + ' ' + d_os['options']+d_os['port'] + d_os['repo'] + d_os['repo_vers'] + d_os['ct_cmd']
	    print "Starting.."
	    print cmd 
	    out = check_output(shlex.split(cmd))
	    print "Output is:"
	    print out
            d_os['imgid'] = out.rstrip()
	    num -= 1
            if d_os['code'] == 1 :
	        programmingsite.movedata_host2ct(d_os)

    if d_os['proceed_nextq'] :
	    with Connection(Redis()):
		q=Queue('setupq', default_timeout=15000)
  	        job = q.enqueue_call(func=setup_container,args=(d_os,),result_ttl=600)
    		cur_job.meta['request_status'] = "Install Software"
		cur_job.meta['setupq_jobid'] = job.id
		cur_job.save()
		cur_job.refresh()
		print cur_job.meta
Example #26
0
 def unregister_dirty(self, decrement=1):
     """Unregister current TreeItem as dirty
     (should be called from RQ job procedure after cache is updated)
     """
     r_con = get_connection()
     job = get_current_job()
     logger.debug('UNREGISTER %s (-%s) where job_id=%s' %
                  (self.get_cachekey(), decrement, job.id))
     r_con.zincrby(POOTLE_DIRTY_TREEITEMS, self.get_cachekey(), 0 - decrement)
Example #27
0
def send_message(**params):
    """
        Tries to send the message with specified parameters & number of retries
        
        Args:
            to (list) - List of emails to send the message to
            from_email (str) - Email to send the message on behalf of
            subject (str) - Subject of the message
            text (str) - Main text that should go in the body of the message
            cc (list) - Optional; list of emails to send the message to, with the 'cc' header
            bcc (list) - Optional; list of emails to send the message to, with the 'bcc' header
            retries (int) - Optional; number of times each Mailer implementation should try to send the message
    
            All email fields are as specified in RFC-822
    """
    retries = params.get('retries', 1) #By default retry 1 time
    
    # TODO: Random shuffling is a crude load-balancing method. Ideally we may want to consider
    # the number of requests to send message made to each Mailer and route new requests accordingly.
    mailers = get_available_mailers()
    shuffle(mailers)

    #TODO: Check if rq has any inbuilt retry mechanism that can be leveraged
    while retries >= 0:
        for mailer in mailers:
            try:
                messages_info = mailer.send_message(**params)
                
                job = get_current_job()
                job.meta['handled_by'] = mailer.__class__.__name__
                job.meta['messages_info'] = messages_info
                job.save()

                # TODO: Use a better way to store status info & metadata for it
                return

            except MailNotSentException as e:
                # TODO: Use logging here to log details of why this mail wasn't sent using
                # e.message & e.status_code. Also, add more details to MailNotSentException
                # if required
                pass
            
            except ConnectTimeout as e:
                # TODO: log
                pass
            
            # Catch other Exceptions that can be thrown here
            
            except Exception as e:
                # If the send_message method fails for any reason whatsoever, we want to use the
                # next Mailer.
                # TODO: Log. These logs will be very important as they'll let us know about failures
                # we're not anticipating
                pass

        retries = retries - 1
Example #28
0
def _set_task_progress(task):
    """
    This method will update the job progress using the task object
    :param task : Task
    :return:
    """
    job = get_current_job()
    if job:
        job.meta['progress'] = task.export()
        job.save_meta()
Example #29
0
 def unregister_all_dirty(self, decrement=1):
     """Unregister current TreeItem and all parent paths as dirty
     (should be called from RQ job procedure after cache is updated)
     """
     r_con = get_connection()
     job = get_current_job()
     for p in self.all_pootle_paths():
         logger.debug('UNREGISTER %s (-%s) where job_id=%s' %
                      (p, decrement, job.id))
         r_con.zincrby(POOTLE_DIRTY_TREEITEMS, p, 0 - decrement)
Example #30
0
def create_zim(settings, options):
    """Call the zim creator and the mailer when it is finished.
    """
    job = get_current_job()
    log_dir = settings.get('zimit.logdir', '/tmp')
    log_file = os.path.join(log_dir, "%s.log" % job.id)
    zim_creator = load_from_settings(settings, log_file)
    zim_file = zim_creator.create_zim_from_website(options['url'], options)
    output_url = settings.get('zimit.output_url')
    zim_url = urlparse.urljoin(output_url, zim_file)
    send_zim_url(settings, options['email'], zim_url)
Example #31
0
def analyze_user(user_id=None, user_tag=None, max_friends=20):
    try:
        userm = User.query.get(user_id)
        steps = 5
        # GET MAIN USER
        current_step = 1
        _set_task_progress(current_step,
                           'Obteniendo informacion de {}'.format(user_tag))
        app.logger.info("Solicitando analisis de {}".format(user_tag))
        user = get_user(user_name=user_tag)

        # GET FRIENDS IDS
        _set_task_progress(int(current_step * 100 / steps),
                           'Obteniendo lista de amigos')
        friends_ids = get_user_friends(user_name=user['screen_name'])
        current_step += 1

        # pasos fijos + cantidad de amigos a recuperar + max_friends (si hay mas de 20) a analizar
        # por cada amigo hay que recuperar tweets, obtener personalidad y comparar con metrica con el usuario
        friends_n = len(friends_ids)
        if friends_n < 20:
            steps += friends_n + (friends_n * 3)
        else:
            steps += friends_n + (max_friends * 3)
        current_step += 1
        # GET USER TWEETS LIST
        _set_task_progress(int(current_step * 100 / steps),
                           'Obteniendo tweets de {}'.format(user['user_name']))
        user_tweets = get_user_tweets(user_name=user['screen_name'])
        current_step += 1
        # GET USER PERSONALITIES
        _set_task_progress(
            int(current_step * 100 / steps),
            'Obteniendo perfil de personalidad de {}'.format(
                user['user_name']))
        user_personality = get_user_personality(user_tweets)
        current_step += 1

        friends_list = list()
        for friend in friends_ids:
            # GET SPECIFIC FRIEND
            f = get_user(user_id=friend)  # user dict
            _set_task_progress(
                int(current_step * 100 / steps),
                'Obteniendo informacion de {}'.format(f['user_name']))
            friends_list.append(f)
            current_step += 1
        # SORT LIST OF FRIENDS BY POST COUNT
        friends_list = sorted(friends_list,
                              key=lambda k: k['user_tweets'],
                              reverse=True)
        # ONLY MOST RELEVANT MAX_FRIENDS
        friends_list = friends_list[:max_friends]

        # GET FRIENDS INFO
        friends_data = list()
        for friend in friends_list:
            # GET FRIEND TWEETS
            _set_task_progress(
                int(current_step * 100 / steps),
                'Obteniendo tweets de {}'.format(friend['user_name']))
            friend_tweets = get_user_tweets(user_id=friend['user_id'])
            current_step += 1
            # GET FRIEND PERSONALITY
            _set_task_progress(
                int(current_step * 100 / steps),
                'Obteniendo perfil de personalidad de {}'.format(
                    friend['user_name']))
            friend_personality = get_user_personality(friend_tweets)
            friend_s = {
                'name': friend['screen_name'],
                'personality': friend_personality,
                'tweets_count': friend['user_tweets'],
                'avatar': friend['avatar']
            }
            friends_data.append(friend_s)
            current_step += 1

        # OBTAIN DISTANCE METRICS
        for friend in friends_data:
            _set_task_progress(
                int(current_step * 100 / steps),
                'Comparando {} con {}'.format(user['user_name'],
                                              friend['name']))
            distance = manhattan_distance(user_personality,
                                          friend['personality'])
            friend['distance'] = distance
            current_step += 1

        # SORT FRIENDS LIST ACCORDING TO DISTANCE
        friends_data = sorted(friends_data, key=lambda k: k['distance'])

        _set_task_progress(100, 'Analisis completo')
        user_analysis = {
            'user_name': user['user_name'],
            'user_screen_name': user['screen_name'],
            'user_personality': user_personality,
            'user_tweets_count': user['user_tweets'],
            'user_friends_count': user['user_friends'],
            'user_avatar': user['avatar'],
            'user': 1,
            'friends': friends_data
        }
        job = get_current_job()
        result = Result(id=job.get_id(), result=json.dumps(user_analysis))
        db.session.add(result)
        db.session.commit()

        # SEND RESULTS EMAIL
        send_email('[ourPersonalities] Análisis completado',
                   sender=app.config['LYRADMIN'][0],
                   recipients=[userm.email],
                   text_body=render_template('email/task_completed.txt',
                                             user=userm,
                                             task=job.get_id()),
                   html_body=render_template('email/task_completed.html',
                                             user=userm,
                                             task=job.get_id()))
        # Sleeping 'til api request cools down
        time.sleep(60 * 15)
    except:
        _set_task_progress(500, 'Analisis fallido')
        app.logger.error('Unhandled exception', exc_info=sys.exc_info())
        print("error")
Example #32
0
def convert(instance, input_file, output_folder, priority, encoding_profile):

    errors = []

    profiles = {
        "240p": {
            "width": 426,
            "vb": 300,
            "mb": 300,
            "bs": 600
        },
        "360p": {
            "width": 640,
            "vb": 350,
            "mb": 350,
            "bs": 700
        },
        "480p": {
            "width": 854,
            "vb": 500,
            "mb": 500,
            "bs": 1000
        },
        "720p": {
            "width": 1280,
            "vb": 1000,
            "mb": 1000,
            "bs": 2000
        },
        "1080p": {
            "width": 1920,
            "vb": 2000,
            "mb": 2000,
            "bs": 4000
        }
    }

    job = get_current_job()
    print("Current job: {}".format(job.id))

    # Points to 'EdulearnNetUpload' folder
    vidcon_root = app.config['VIDCON_ROOT']

    # Check if directory is mounted properly
    if os.path.exists(vidcon_root):
        if len(os.listdir(vidcon_root)) == 0:
            #TODO mount sequence for OSX
            subprocess.call(["sudo", "mount", "/media/edulearnupload/"])

    # E.g /Volumes/EdulearnNETUpload/asknlearn/vidcon/input/small_Sample.mp4
    input_file_absolute_path = os.path.join(vidcon_root + instance + '/' +
                                            input_file)

    # E.g /Volumes/EdulearnNETUpload/asknlearn/vidcon/output/
    output_folder_absolute_path = os.path.join(vidcon_root + instance + '/' +
                                               output_folder)

    # E.g small_Sample.mp4, derived from the given input filename
    output_filename = os.path.split(input_file)[1]

    # Split a filename into its name and extension:
    # E.g output_file = small_Sample, output_file_extension = .mp4
    output_file, output_file_extension = os.path.splitext(output_filename)
    # Force extension to be ".mp4"
    output_file_extension = ".mp4"

    # Rename the file to include the profile that it was encoded under
    # E.g small_Sample_240p.mp4
    new_output_filename = output_file + "_" + encoding_profile + output_file_extension

    new_output_file = os.path.join(output_folder_absolute_path + "/" +
                                   new_output_filename)

    profile = profiles[encoding_profile]

    ffmpeg_cmd = """
        ffmpeg -i '{0}' -codec:v libx264 -profile:v high -preset slow -b:v {1}k -maxrate {2}k 
               -bufsize {3}k -vf scale={4}:trunc(ow/a/2)*2 -threads 0 
               -codec:a mp3 -b:a 64k -y '{5}'""".format(
        input_file_absolute_path, str(profile['vb']), str(profile['mb']),
        str(profile['bs']), str(profile['width']), new_output_file)

    std_err = ""
    std_in = ""
    output = ""

    try:
        output = subprocess.check_output(shlex.split(ffmpeg_cmd))
        print("Success: {}", output)
    except subprocess.CalledProcessError as ex:

        p = subprocess.Popen(shlex.split(ffmpeg_cmd),
                             bufsize=2048,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)

        std_in, std_err = map(
            lambda b: b.decode('utf-8').replace(os.linesep, '\n'),
            p.communicate((os.linesep).encode('utf-8')))

        print("std_in: {}".format(std_in))
        print("std_err: {}".format(std_err))

        raise GazzaThinksItFailedError("""
                                        \n\nreturn_code: \n{}\n\nffmpeg_cmd: \n{}\n\noutput: \n{}\n\n
                                        """.format(ex.returncode,
                                                   ffmpeg_cmd.strip(),
                                                   std_err.strip()))
    finally:
        print("{}".format(input_file_absolute_path))
        print("{}".format(new_output_file))
Example #33
0
File: jobs.py Project: mx2x/upribox
def job_message(message):
    job = get_current_job(connection=django_rq.get_connection())
    if not job.meta.get('messages'):
        job.meta['messages'] = deque()
    job.meta['messages'].append(message)
    job.save()
Example #34
0
def update_background(course_id, extension_dict):
    """
    Update time on selected students' quizzes to a specified percentage.

    :param course_id: The Canvas ID of the Course to update in
    :type course_id: int
    :param extension_dict: A dictionary that includes the percent of
        time and a list of canvas user ids.

        Example:
        {
            'percent': '300',
            'user_ids': [
                '0123456',
                '1234567',
                '9867543',
                '5555555'
            ]
        }
    :type extension_dict: dict
    """
    job = get_current_job()

    update_job(job, 0, "Starting...", "started")

    with app.app_context():
        if not extension_dict:
            update_job(job, 0, "Invalid Request", "failed", error=True)
            logger.warning("Invalid Request: {}".format(extension_dict))
            return job.meta

        try:
            course_json = get_course(course_id)
        except requests.exceptions.HTTPError:
            update_job(job, 0, "Course not found.", "failed", error=True)
            logger.exception("Unable to find course #{}".format(course_id))
            return job.meta

        course_name = course_json.get("name", "<UNNAMED COURSE>")

        user_ids = extension_dict.get("user_ids", [])
        percent = extension_dict.get("percent", None)

        if not percent:
            update_job(job,
                       0,
                       "`percent` field required.",
                       "failed",
                       error=True)
            logger.warning("Percent field not provided. Request: {}".format(
                extension_dict))
            return job.meta

        course, created = get_or_create(db.session,
                                        Course,
                                        canvas_id=course_id)
        course.course_name = course_name
        db.session.commit()

        for user_id in user_ids:
            try:
                canvas_user = get_user(course_id, user_id)

                sortable_name = canvas_user.get("sortable_name",
                                                "<MISSING NAME>")
                sis_id = canvas_user.get("sis_user_id")

            except requests.exceptions.HTTPError:
                # Unable to find user. Log and skip them.
                logger.warning("Unable to find user #{} in course #{}".format(
                    user_id, course_id))
                continue

            user, created = get_or_create(db.session, User, canvas_id=user_id)

            user.sortable_name = sortable_name
            user.sis_id = sis_id

            db.session.commit()

            # create/update extension
            extension, created = get_or_create(db.session,
                                               Extension,
                                               course_id=course.id,
                                               user_id=user.id)
            extension.percent = percent

            db.session.commit()

        quizzes = get_quizzes(course_id)
        num_quizzes = len(quizzes)
        quiz_time_list = []
        unchanged_quiz_time_list = []

        if num_quizzes < 1:
            update_job(
                job,
                0,
                "Sorry, there are no quizzes for this course.",
                "failed",
                error=True,
            )
            logger.warning(
                "No quizzes found for course {}. Unable to update.".format(
                    course_id))
            return job.meta

        for index, quiz in enumerate(quizzes):
            quiz_id = quiz.get("id", None)
            quiz_title = quiz.get("title", "[UNTITLED QUIZ]")

            comp_perc = int(((float(index)) / float(num_quizzes)) * 100)
            updating_str = "Updating quiz #{} - {} [{} of {}]"
            update_job(
                job,
                comp_perc,
                updating_str.format(quiz_id, quiz_title, index + 1,
                                    num_quizzes),
                "processing",
                error=False,
            )

            extension_response = extend_quiz(course_id, quiz, percent,
                                             user_ids)

            if extension_response.get("success", False) is True:
                # add/update quiz
                quiz_obj, created = get_or_create(db.session,
                                                  Quiz,
                                                  canvas_id=quiz_id,
                                                  course_id=course.id)
                quiz_obj.title = quiz_title
                quiz_obj.time_limit = quiz.get("time_limit")

                db.session.commit()

                added_time = extension_response.get("added_time", None)
                if added_time is not None:
                    quiz_time_list.append({
                        "title": quiz_title,
                        "added_time": added_time
                    })
                else:
                    unchanged_quiz_time_list.append({"title": quiz_title})
            else:
                update_job(
                    job,
                    comp_perc,
                    extension_response.get("message",
                                           "An unknown error occured."),
                    "failed",
                    error=True,
                )
                logger.error("Extension failed: {}".format(extension_response))
                return job.meta

        msg_str = (
            "Success! {} {} been updated for {} student(s) to have {}% time. "
            "{} {} no time limit and were left unchanged.")

        message = msg_str.format(
            len(quiz_time_list),
            "quizzes have" if len(quiz_time_list) != 1 else "quiz has",
            len(user_ids),
            percent,
            len(unchanged_quiz_time_list),
            "quizzes have"
            if len(unchanged_quiz_time_list) != 1 else "quiz has",
        )

        update_job(job, 100, message, "complete", error=False)
        job.meta["quiz_list"] = quiz_time_list
        job.meta["unchanged_list"] = unchanged_quiz_time_list
        job.save()

        return job.meta
Example #35
0
def invoke_iap_analysis(analysis_id, timestamp_id, username, task_key,
                        experiment_iap_id=None):
    """
    This Methods represents an RQ Job workload. It should be enqueued into the RQ Analysis Queue and processed by an according worker

    Handles the invocation of data analysis in IAP on the IAP server and fetches the result information afterwards.
    The received information is then entered into the database accordingly

    The experiment_id has to be either passed directly or has to be stored in the result
    of a job that this one depends on. The key under which it must be stored from the previous job is 'response.experiment_iap_id'

    :param analysis_id: The ID of the :class:`~server.models.analysis_model.AnalysisModel`
    :param timestamp_id: The ID of the :class:`~server.models.timestamp_model.TimestampModel` instance which should be analyzed
    :param username: The username of the user invoking this job
    :param experiment_iap_id: The IAP ID of this experiment. If this is None the job will assume that the job it depended on
        has returned the experiment id in its response object with the key 'experiment_id'

    :return: A dict containing the 'result_id' from IAP, the used 'pipeline_id', 'started_at' and 'finished_at' timestamps.
        (All nested inside the 'response' key)
    """
    print('EXECUTE ANALYSIS')
    job = get_current_job()
    log_store = get_log_store()
    task = AnalysisTask.from_key(get_redis_connection(), task_key)
    channel = get_grpc_channel()
    iap_stub = phenopipe_iap_pb2_grpc.PhenopipeIapStub(channel)
    pipe_stub = phenopipe_pb2_grpc.PhenopipeStub(channel)
    if experiment_iap_id is None:
        experiment_iap_id = job.dependency.result['response']['experiment_iap_id']

    log_store.put(job.id, 'Started Analysis Job', 0)
    task.update_message('Started Analysis Job')
    session = get_session()
    # TODO Consider DB errors
    analysis = session.query(AnalysisModel).get(analysis_id)
    started_at = datetime.utcnow()
    analysis.started_at = started_at
    session.commit()
    try:
        response = iap_stub.AnalyzeExperiment(
            phenopipe_iap_pb2.AnalyzeRequest(experiment_id=experiment_iap_id, pipeline_id=analysis.pipeline_id)
        )
        remote_job_id = response.job_id
        request = phenopipe_pb2.WatchJobRequest(
            job_id=remote_job_id
        )
        status = pipe_stub.WatchJob(request)
        for msg in status:
            log_store.put(job.id, msg.message.decode('string-escape'), msg.progress)

        response = iap_stub.FetchAnalyzeResult(
            phenopipe_pb2.FetchJobResultRequest(job_id=remote_job_id)
        )
        finished_at = datetime.utcnow()

        analysis.iap_id = response.result_id
        analysis.finished_at = finished_at
        session.commit()
        log_store.put(job.id, 'Finished Analysis Job', 100)
        task.update_message('Finished Analysis Job')
        return create_return_object(JobType.iap_analysis, timestamp_id,
                                    {'result_id': response.result_id, 'started_at': started_at,
                                     'finished_at': finished_at, 'pipeline_id': analysis.pipeline_id})
    except grpc.RpcError as e:
        session.delete(session.query(AnalysisModel).get(analysis.id))
        session.commit()
        log_store.put(job.id, e.details(), 0)
        task.update_message('Analysis Job Failed')
        raise
Example #36
0
def setup_template_task(template_id, name, user, password, cores, memory):
    with app.app_context():
        job = get_current_job()
        proxmox = connect_proxmox()
        starrs = connect_starrs()
        db = connect_db()
        print("[{}] Retrieving template info for template {}.".format(
            name, template_id))
        template = get_template(db, template_id)
        print("[{}] Cloning template {}.".format(name, template_id))
        job.meta['status'] = 'cloning template'
        job.save_meta()
        vmid, mac = clone_vm(proxmox, template_id, name, user)
        print("[{}] Registering in STARRS.".format(name))
        job.meta['status'] = 'registering in STARRS'
        job.save_meta()
        ip = get_next_ip(starrs, app.config['STARRS_IP_RANGE'])
        register_starrs(starrs, name, app.config['STARRS_USER'], mac, ip)
        get_vm_expire(db, vmid, app.config['VM_EXPIRE_MONTHS'])
        print("[{}] Setting CPU and memory.".format(name))
        job.meta['status'] = 'setting CPU and memory'
        job.save_meta()
        vm = VM(vmid)
        vm.set_cpu(cores)
        vm.set_mem(memory)
        print(
            "[{}] Waiting for STARRS to propogate before starting VM.".format(
                name))
        job.meta['status'] = 'waiting for STARRS'
        job.save_meta()
        time.sleep(90)
        print("[{}] Starting VM.".format(name))
        job.meta['status'] = 'starting VM'
        job.save_meta()
        vm.start()
        print("[{}] Waiting for VM to start before SSHing.".format(name))
        job.meta['status'] = 'waiting for VM to start'
        job.save_meta()
        time.sleep(20)
        print("[{}] Creating SSH session.".format(name))
        job.meta['status'] = 'creating SSH session'
        job.save_meta()
        client = paramiko.SSHClient()
        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        retry = 0
        while retry < 30:
            try:
                client.connect(ip,
                               username=template['username'],
                               password=template['password'])
                break
            except:
                retry += 1
                time.sleep(3)
        print("[{}] Running user creation commands.".format(name))
        job.meta['status'] = 'running user creation commands'
        job.save_meta()
        stdin, stdout, stderr = client.exec_command("useradd {}".format(user))
        exit_status = stdout.channel.recv_exit_status()
        root_password = gen_password(32)
        stdin, stdout, stderr = client.exec_command(
            "echo '{}' | passwd root --stdin".format(root_password))
        exit_status = stdout.channel.recv_exit_status()
        stdin, stdout, stderr = client.exec_command(
            "echo '{}' | passwd '{}' --stdin".format(password, user))
        exit_status = stdout.channel.recv_exit_status()
        stdin, stdout, stderr = client.exec_command(
            "passwd -e '{}'".format(user))
        exit_status = stdout.channel.recv_exit_status()
        stdin, stdout, stderr = client.exec_command(
            "echo '{} ALL=(ALL:ALL) ALL' | sudo EDITOR='tee -a' visudo".format(
                user))
        exit_status = stdout.channel.recv_exit_status()
        client.close()
        print("[{}] Template successfully provisioned.".format(name))
        job.meta['status'] = 'completed'
        job.save_meta()
Example #37
0
def preprocess(job):
    jb = get_current_job()
    print('Current job: %s' % (jb.id, ))
    try:
        job.type = 'preprocess'
        job.init_storage()
        dt = datetime.now()
        jb.meta['job_exec_time'] = str(int(dt.timestamp() * 1000))
        job.init_temp(jb.id)
        job.init_labels()
        job.jb = jb
        job.traincoco = {
            "info": {
                "description": "COCO 2017 Dataset",
                "url": "http://cocodataset.org",
                "version": "1.0",
                "year": 2018,
                "contributor": "COCO Consortium",
                "date_created": "2017/09/01"
            },
            "licenses": [],
            "images": [],
            "categories": [],
            "annotations": [],
        }
        job.testcoco = {
            "info": {
                "description": "COCO 2017 Dataset",
                "url": "http://cocodataset.org",
                "version": "1.0",
                "year": 2018,
                "contributor": "COCO Consortium",
                "date_created": "2017/09/01"
            },
            "licenses": [],
            "images": [],
            "categories": [],
            "annotations": [],
        }

        if hasattr(job, 'aug') and job.aug:
            jb.meta['steps'] = 6
        else:
            jb.meta['steps'] = 5
        jb.meta['current_step_processed'] = 0
        jb.meta['current_step_name'] = 'prep_existing_images'
        jb.meta['current_step'] = 0
        jb.save_meta()
        process_json(job)
        jb.meta['current_step_processed'] = 0
        jb.meta['current_step_size'] = 1
        jb.meta['current_step_name'] = 'create_tag_lables'
        jb.meta['current_step'] += 1

        create_label_pbtxt(job)
        jb.meta['current_step_size'] = 0
        jb.meta['current_step_name'] = 'create_training_corpus'
        jb.meta['current_step'] += 1
        jb.save_meta()
        create_tf_example(job)
        jb.meta['current_step_size'] = 0
        jb.meta['current_step_name'] = 'create_testing_corpus'
        jb.meta['current_step'] += 1
        jb.save_meta()
        create_tf_example(job, False)
        jb.meta['current_step_size'] = 0
        jb.meta['current_step_name'] = 'cleaning_up'
        jb.meta['current_step'] += 1
        jb.save_meta()
        delete_staged(job)
        upload_metadata(job)
        jb.meta['current_step_size'] = 0
        jb.meta['current_step_name'] = 'done'
        jb.meta['current_step'] += 1
        dt = datetime.now()
        jb.meta['job_end_time'] = str(int(dt.timestamp() * 1000))
        jb.save_meta()
        job.upload_data(job.to_json_string(),
                        'jobs/finished/{}_{}_preprocess_d_{}.json'.format(
                            str(job.start_time), str(job.end_time), jb.id),
                        contentType='application/json')
        return job
    except:
        var = traceback.format_exc()
        dt = datetime.now()
        job.end_time = int(dt.timestamp() * 1000)
        jb.meta['job_exception'] = var
        job.exception = var
        try:
            job.upload_data(job.to_json_string(),
                            'jobs/failed/{}_{}_preprocess_f_{}.json'.format(
                                str(job.start_time), str(job.end_time), jb.id),
                            contentType='application/json')
        except:
            pass
        jb.save_meta()
        raise
    finally:
        try:
            ct = 'd'
            if hasattr(job, 'exception'):
                ct = 'f'
            job.upload_data(job.to_json_string(),
                            'jobs/all/{}_{}_preprocess_{}_{}.json'.format(
                                str(job.start_time), str(job.end_time), ct,
                                jb.id),
                            contentType='application/json')
        except:
            pass
        try:

            job.delete_cloud_file(
                'jobs/running/{}_0_preprocess_r_{}.json'.format(
                    str(job.start_time), jb.id))
        except:
            pass
        try:
            job.delete_cloud_file('jobs/all/{}_0_preprocess_r_{}.json'.format(
                str(job.start_time), jb.id))
        except:
            pass
        job.cleanup()
Example #38
0
def access_self():
    return get_current_job().id
Example #39
0
def modify_self_and_error(meta):
    j = get_current_job()
    j.meta.update(meta)
    j.save()
    return 1 / 0
Example #40
0
def export_clients():
    global file_name
    try:
        job = get_current_job()
        clients = Client.query.all()
        total_clients = Client.query.count()

        # _set_task_progress(0)
        # i = 0

        file_name = 'clients' + "-" + str(time.time()) + '.xlsx'
        workbook = xlsxwriter.Workbook(Config.EXCEL_FOLDER + file_name)
        # Add a bold format to use to highlight cells.
        bold = workbook.add_format({'bold': True})

        worksheet = workbook.add_worksheet()

        date_format = workbook.add_format({'num_format': 'mmmm d yyyy'})

        # Start from the first cell. Rows and columns are zero indexed.
        row = 1
        col = 0

        worksheet.write('A1', 'Name', bold)
        worksheet.write('B1', 'Email', bold)
        worksheet.write('C1', 'Contact', bold)
        worksheet.write('D1', 'Building', bold)
        worksheet.write('E1', 'House number', bold)
        worksheet.write('F1', 'Package', bold)
        worksheet.write('G1', 'Last payment date', bold)
        worksheet.write('H1', 'Due date', bold)

        worksheet.set_column('A:B', 30)
        worksheet.set_column('B:C', 30)
        worksheet.set_column('C:D', 30)
        worksheet.set_column('D:E', 30)
        worksheet.set_column('E:F', 30)
        worksheet.set_column('F:G', 20)
        worksheet.set_column('G:H', 30)
        worksheet.set_column('H:I', 30)

        for client in clients:
            service = Service.query.filter_by(client_id=client.id).first()
            payment = Payment.query.filter_by(client_id=client.id).last()

            name = client.client_user.fullname()
            email = client.client_user.email
            contact = client.client_user.phone
            building = client.building
            house = client.house
            if service:
                package = service.service_tariff.name
            else:
                package = None

            worksheet.write(row, col, name)
            worksheet.write(row, col + 1, email)
            worksheet.write(row, col + 2, contact)
            worksheet.write(row, col + 3, building)
            worksheet.write(row, col + 4, house)
            worksheet.write(row, col + 5, package)
            if payment:
                last_payment = payment.date
                due_date = last_payment + relativedelta(months=+1, days=-1)
                worksheet.write_datetime(row, col + 6, last_payment,
                                         date_format)
                worksheet.write_datetime(row, col + 7, due_date, date_format)
            else:
                last_payment = None
                due_date = None
                worksheet.write(row, col + 6, last_payment)
                worksheet.write(row, col + 7, due_date)

            row += 1

        workbook.close()

        time.sleep(5)
        # i += 1
        task = Task.query.get(job.get_id())
        task.complete = True
        db.session.commit()

        download = Download.query.filter_by(task_id=job.get_id()).first()
        if download:
            d = datetime.now
            name = 'Data of ' + str(total_clients) + ' clients in XLS'
            download.name = name
            download.path = file_name
            download.generated = datetime.now()
            download.status = 1
            db.session.commit()

    except:
        app.logger.error('Unhandled exception', exc_info=sys.exc_info())
Example #41
0
def make_schedule(
    asset_id: int,
    start: datetime,
    end: datetime,
    belief_time: datetime,
    resolution: timedelta,
    soc_at_start: Optional[float] = None,
    soc_targets: Optional[pd.Series] = None,
) -> bool:
    """Preferably, a starting soc is given.
    Otherwise, we try to retrieve the current state of charge from the asset (if that is the valid one at the start).
    Otherwise, we set the starting soc to 0 (some assets don't use the concept of a state of charge,
    and without soc targets and limits the starting soc doesn't matter).
    """
    # https://docs.sqlalchemy.org/en/13/faq/connections.html#how-do-i-use-engines-connections-sessions-with-python-multiprocessing-or-os-fork
    db.engine.dispose()

    rq_job = get_current_job()

    # find asset
    asset = Asset.query.filter_by(id=asset_id).one_or_none()

    click.echo(
        "Running Scheduling Job %s: %s, from %s to %s" % (rq_job.id, asset, start, end)
    )

    if soc_at_start is None:
        if start == asset.soc_datetime and asset.soc_in_mwh is not None:
            soc_at_start = asset.soc_in_mwh
        else:
            soc_at_start = 0

    if soc_targets is None:
        soc_targets = pd.Series(
            np.nan, index=pd.date_range(start, end, freq=resolution, closed="right")
        )

    if asset.asset_type_name == "battery":
        consumption_schedule = schedule_battery(
            asset, asset.market, start, end, resolution, soc_at_start, soc_targets
        )
    elif asset.asset_type_name in (
        "one-way_evse",
        "two-way_evse",
    ):
        consumption_schedule = schedule_charging_station(
            asset, asset.market, start, end, resolution, soc_at_start, soc_targets
        )
    else:
        raise ValueError(
            "Scheduling is not supported for asset type %s." % asset.asset_type
        )

    data_source = get_data_source(
        data_source_name="Seita",
        data_source_type="scheduling script",
    )
    click.echo("Job %s made schedule." % rq_job.id)

    ts_value_schedule = [
        Power(
            datetime=dt,
            horizon=dt.astimezone(pytz.utc) - belief_time.astimezone(pytz.utc),
            value=-value,
            asset_id=asset_id,
            data_source_id=data_source.id,
        )
        for dt, value in consumption_schedule.items()
    ]  # For consumption schedules, positive values denote consumption. For the db, consumption is negative

    try:
        save_to_session(ts_value_schedule)
    except IntegrityError as e:

        current_app.logger.warning(e)
        click.echo("Rolling back due to IntegrityError")
        db.session.rollback()

        if current_app.config.get("FLEXMEASURES_MODE", "") == "play":
            click.echo("Saving again, with overwrite=True")
            save_to_session(ts_value_schedule, overwrite=True)

    db.session.commit()

    return True
Example #42
0
def _set_task_results(md5):
    job = get_current_job()
    if job:
        task = Task.query.get(job.get_id())
        task.md5 = md5
        db.session.commit()
Example #43
0
def run_task_predictions(ml_backend_id, batch_size=100):
    """
    Run prediction and update db, stats counts and project prerequisites
    :param project_id:
    :param batch_size:
    :return:
    """
    ml_backend = MLBackend.objects.get(id=ml_backend_id)
    response = ml_backend.setup()
    if response.is_error:
        raise ValueError(response.error_message)
    else:
        if response.response['model_version'] != ml_backend.model_version:
            ml_backend.model_version = response.response['model_version']
            ml_backend.save()

    # collect tasks without predictions for current model version
    tasks_without_predictions = ml_backend.project.tasks.annotate(
        model_version=F('predictions__model_version'),
        num_predictions=Count('predictions')).filter(
            ~Q(model_version=ml_backend.model_version) | Q(num_predictions=0))

    if not tasks_without_predictions.exists():
        logger.info(
            f'Predictions for project {ml_backend.project} with version {ml_backend.model_version} already exist, '
            f'update is not needed')
        return {'status': 'ok'}
    else:
        logger.info(
            f'Found {tasks_without_predictions.count()} tasks without predictions '
            f'from model version {ml_backend.model_version} in project {ml_backend.project}'
        )

    # TODO: randomize tasks selection so that taken tasks don't clash with each other with high probability
    tasks = TaskSerializer(tasks_without_predictions[:batch_size],
                           many=True).data

    failed_tasks = []
    for task in tasks:
        task_id = task['id']
        ml_api_result = ml_backend.api.make_predictions(
            [task], ml_backend.model_version, ml_backend.project)
        if not _validate_ml_api_result(ml_api_result, [task], logger):
            logger.warning(
                f'Project {ml_backend.project}: task {task.id} failed')
            failed_tasks.append(task)
            continue

        prediction_result = ml_api_result.response['results'][0]

        with transaction.atomic():
            Prediction.objects.filter(
                task_id=task_id,
                model_version=ml_backend.model_version).delete()
            Prediction.objects.create(
                task_id=task_id,
                model_version=ml_backend.model_version,
                result=prediction_result['result'],
                score=safe_float(prediction_result.get('score', 0)),
                cluster=prediction_result.get('cluster'),
                neighbors=prediction_result.get('neighbors'),
                mislabeling=safe_float(prediction_result.get('mislabeling',
                                                             0)))
        logger.info(
            f'Project {ml_backend.project}: task {task_id} processed with model version {ml_backend.model_version}'
        )

    MLBackendPredictionJob.objects.filter(job_id=get_current_job().id).delete()
    logger.info(
        f'Total task processes: {len(tasks)}, failed: {len(failed_tasks)}')
    return {
        'status': 'ok',
        'processed_num': len(tasks),
        'failed': failed_tasks
    }
Example #44
0
def worker_function(event_type,
                    assignment_id,
                    participant_id,
                    node_id=None,
                    details=None):
    """Process the notification."""
    _config()
    q = _get_queue()
    try:
        db.logger.debug("rq: worker_function working on job id: %s",
                        get_current_job().id)
        db.logger.debug("rq: Received Queue Length: %d (%s)", len(q),
                        ", ".join(q.job_ids))
    except AttributeError:
        db.logger.debug("Debug worker_function called synchronously")

    exp = _loaded_experiment(db.session)
    key = "-----"

    exp.log(
        "Received an {} notification for assignment {}, participant {}".format(
            event_type, assignment_id, participant_id),
        key,
    )

    if event_type == "TrackingEvent":
        node = None
        if node_id:
            node = models.Node.query.get(node_id)
        if not node:
            participant = None
            if participant_id:
                # Lookup assignment_id to create notifications
                participant = models.Participant.query.get(participant_id)
            elif assignment_id:
                participants = models.Participant.query.filter_by(
                    assignment_id=assignment_id).all()
                # if there are one or more participants select the most recent
                if participants:
                    participant = max(participants,
                                      key=attrgetter("creation_time"))
                    participant_id = participant.id
            if not participant:
                exp.log(
                    "Warning: No participant associated with this "
                    "TrackingEvent notification.",
                    key,
                )
                return
            nodes = participant.nodes()
            if not nodes:
                exp.log(
                    "Warning: No node associated with this "
                    "TrackingEvent notification.",
                    key,
                )
                return
            node = max(nodes, key=attrgetter("creation_time"))

        if not details:
            details = {}
        info = information.TrackingEvent(origin=node, details=details)
        db.session.add(info)
        db.session.commit()
        return

    runner_cls = WorkerEvent.for_name(event_type)
    if not runner_cls:
        exp.log(
            "Event type {} is not supported... ignoring.".format(event_type))
        return

    if assignment_id is not None:
        # save the notification to the notification table
        notif = models.Notification(assignment_id=assignment_id,
                                    event_type=event_type)
        db.session.add(notif)
        db.session.commit()

        # try to identify the participant
        participants = models.Participant.query.filter_by(
            assignment_id=assignment_id).all()

        # if there are one or more participants select the most recent
        if participants:
            participant = max(participants, key=attrgetter("creation_time"))

        # if there are none print an error
        else:
            exp.log(
                "Warning: No participants associated with this "
                "assignment_id. Notification will not be processed.",
                key,
            )
            return None

    elif participant_id is not None:
        participant = models.Participant.query.filter_by(
            id=participant_id).all()[0]
    else:
        raise ValueError(
            "Error: worker_function needs either an assignment_id or a "
            "participant_id, they cannot both be None")

    participant_id = participant.id

    runner = runner_cls(participant, assignment_id, exp, db.session, _config(),
                        datetime.now())
    runner()
    db.session.commit()
Example #45
0
def set_task_complete():
    job = get_current_job()
    if job:
        task = Task.query.get(job.get_id())
        task.complete = True
        db.session.commit()
Example #46
0
def build(req: dict):
    """Build image request and setup ImageBuilders automatically

    The `request` dict contains properties of the requested image.

    Args:
        request (dict): Contains all properties of requested image
    """

    if not req["store_path"].is_dir():
        raise StorePathMissingError()

    job = get_current_job()

    log.debug(f"Building {req}")
    cache = (Path.cwd() / "cache" / req["version"] / req["target"]).parent
    target, subtarget = req["target"].split("/")
    sums_file = Path(cache / f"{subtarget}_sums")
    sig_file = Path(cache / f"{subtarget}_sums.sig")

    def setup_ib():
        """Setup ImageBuilder based on `req`

        This function downloads and verifies the ImageBuilder archive. Existing
        setups are automatically updated if newer version are available
        upstream.
        """
        log.debug("Setting up ImageBuilder")
        if (cache / subtarget).is_dir():
            rmtree(cache / subtarget)

        download_file("sha256sums.sig", sig_file)
        download_file("sha256sums", sums_file)

        if not verify_usign(sig_file, sums_file, req["branch_data"]["pubkey"]):
            raise BadSignatureError()

        ib_search = re.search(
            r"^(.{64}) \*(openwrt-imagebuilder-.+?\.Linux-x86_64\.tar\.xz)$",
            sums_file.read_text(),
            re.MULTILINE,
        )

        if not ib_search:
            raise ChecksumMissingError()

        ib_hash, ib_archive = ib_search.groups()

        download_file(ib_archive)

        if ib_hash != get_file_hash(cache / ib_archive):
            raise BadChecksumError()

        (cache / subtarget).mkdir(parents=True, exist_ok=True)
        extract_archive = subprocess.run(
            ["tar", "--strip-components=1", "-xf", ib_archive, "-C", subtarget],
            cwd=cache,
        )

        if extract_archive.returncode:
            raise ExtractArchiveError()

        log.debug(f"Extracted TAR {ib_archive}")

        (cache / ib_archive).unlink()

        for key in req["branch_data"].get("extra_keys", []):
            fingerprint = fingerprint_pubkey_usign(key)
            (cache / subtarget / "keys" / fingerprint).write_text(
                f"untrusted comment: ASU extra key {fingerprint}\n{key}"
            )

        repos_path = cache / subtarget / "repositories.conf"
        repos = repos_path.read_text()

        # speed up downloads with HTTP and CDN
        repos = repos.replace("https://downloads.openwrt.org", req["upstream_url"])
        repos = repos.replace("http://downloads.openwrt.org", req["upstream_url"])
        repos = repos.replace("https", "http")

        extra_repos = req["branch_data"].get("extra_repos")
        if extra_repos:
            log.debug("Found extra repos")
            for name, repo in extra_repos.items():
                repos += f"\nsrc/gz {name} {repo}"

        repos_path.write_text(repos)
        log.debug(f"Repos:\n{repos}")

        if (Path.cwd() / "seckey").exists():
            # link key-build to imagebuilder
            (cache / subtarget / "key-build").symlink_to(Path.cwd() / "seckey")
        if (Path.cwd() / "pubkey").exists():
            # link key-build.pub to imagebuilder
            (cache / subtarget / "key-build.pub").symlink_to(Path.cwd() / "pubkey")
        if (Path.cwd() / "newcert").exists():
            # link key-build.ucert to imagebuilder
            (cache / subtarget / "key-build.ucert").symlink_to(Path.cwd() / "newcert")

    def download_file(filename: str, dest: str = None):
        """Download file from upstream target path

        The URL points automatically to the targets folder upstream

        Args:
            filename (str): File in upstream target folder
            dest (str): Optional path to store the file, default to target
                        cache folder
        """
        log.debug(f"Downloading {filename}")
        urllib.request.urlretrieve(
            req["upstream_url"]
            + "/"
            + req["branch_data"]["path"].format(version=req["version"])
            + "/targets/"
            + req["target"]
            + "/"
            + filename,
            dest or (cache / filename),
        )

    cache.mkdir(parents=True, exist_ok=True)

    stamp_file = cache / f"{subtarget}_stamp"

    sig_file_headers = urllib.request.urlopen(
        req["upstream_url"]
        + "/"
        + req["branch_data"]["path"].format(version=req["version"])
        + "/targets/"
        + req["target"]
        + "/sha256sums.sig"
    ).info()
    log.debug(f"sig_file_headers: \n{sig_file_headers}")

    origin_modified = sig_file_headers.get("Last-Modified")
    log.info("Origin %s", origin_modified)

    if stamp_file.is_file():
        local_modified = stamp_file.read_text()
        log.info("Local  %s", local_modified)
    else:
        local_modified = ""

    if origin_modified != local_modified:
        log.debug("New ImageBuilder upstream available")
        setup_ib()

    stamp_file.write_text(origin_modified)

    info_run = subprocess.run(
        ["make", "info"], text=True, capture_output=True, cwd=cache / subtarget
    )

    version_code = re.search('Current Revision: "(r.+)"', info_run.stdout).group(1)

    if "version_code" in req:
        if version_code != req.get("version_code"):
            raise ImageBuilderVersionError(
                f"requested {req['version_code']} vs got {version_code}"
            )

    if req.get("diff_packages", False):
        default_packages = set(
            re.search(r"Default Packages: (.*)\n", info_run.stdout).group(1).split()
        )
        profile_packages = set(
            re.search(
                r"{}:\n    .+\n    Packages: (.*?)\n".format(req["profile"]),
                info_run.stdout,
                re.MULTILINE,
            )
            .group(1)
            .split()
        )
        remove_packages = (default_packages | profile_packages) - req["packages"]
        req["packages"] = req["packages"] | set(map(lambda p: f"-{p}", remove_packages))

    manifest_run = subprocess.run(
        [
            "make",
            "manifest",
            f"PROFILE={req['profile']}",
            f"PACKAGES={' '.join(req.get('packages', ''))}",
            "STRIP_ABI=1",
        ],
        text=True,
        cwd=cache / subtarget,
        capture_output=True,
    )

    if manifest_run.returncode:
        if "Package size mismatch" in manifest_run.stderr:
            rmtree(cache / subtarget)
            return build(req)
        else:
            job.meta["stdout"] = manifest_run.stdout
            job.meta["stderr"] = manifest_run.stderr
            job.save_meta()
            raise PackageSelectionError()

    manifest = dict(map(lambda pv: pv.split(" - "), manifest_run.stdout.splitlines()))

    for package, version in req.get("packages_versions", {}).items():
        if package not in manifest:
            raise PackageSelectionError(f"{package} not in manifest")
        if version != manifest[package]:
            raise PackageSelectionError(
                f"{package} version not as requested: {version} vs. {manifest[package]}"
            )

    manifest_packages = manifest.keys()

    log.debug(f"Manifest Packages: {manifest_packages}")

    packages_hash = get_packages_hash(manifest_packages)
    log.debug(f"Packages Hash {packages_hash}")

    bin_dir = Path(req["version"]) / req["target"] / req["profile"] / packages_hash

    (req["store_path"] / bin_dir).mkdir(parents=True, exist_ok=True)

    image_build = subprocess.run(
        [
            "make",
            "image",
            f"PROFILE={req['profile']}",
            f"PACKAGES={' '.join(req['packages'])}",
            f"EXTRA_IMAGE_NAME={packages_hash}",
            f"BIN_DIR={req['store_path'] / bin_dir}",
        ],
        text=True,
        cwd=cache / subtarget,
        capture_output=True,
    )

    # check if running as job or within pytest
    if job:
        job.meta["stdout"] = image_build.stdout
        job.meta["stderr"] = image_build.stderr
        job.meta["bin_dir"] = str(bin_dir)
        job.save_meta()

    if image_build.returncode:
        raise ImageBuildError()

    json_file = Path(req["store_path"] / bin_dir / "profiles.json")

    if not json_file.is_file():
        raise JSONMissingError()

    json_content = json.loads(json_file.read_text())

    if req["profile"] not in json_content["profiles"]:
        raise JSONMissingProfileError()

    json_content.update({"manifest": manifest})
    json_content.update(json_content["profiles"][req["profile"]])
    json_content["id"] = req["profile"]
    json_content.pop("profiles")

    return json_content
Example #47
0
def _dextr_thread(db_data, frame, points):
    job = rq.get_current_job()
    job.meta["result"] = __DEXTR_HANDLER.handle(db_data, frame, points)
    job.save_meta()
Example #48
0
def modify_self(meta):
    j = get_current_job()
    j.meta.update(meta)
    j.save()
Example #49
0
 def add_job_meta(self):
     job = get_current_job()
     job.meta["conn"] = self  # for identification during timeout handling
     job.save_meta()
Example #50
0
def access_self():
    assert get_current_connection() is not None
    assert get_current_job() is not None
Example #51
0
def refresh_background(course_id):
    """
    Look up existing extensions and apply them to new quizzes.

    :param course_id: The Canvas ID of the Course.
    :type course_id: int
    :rtype: dict
    :returns: A dictionary containing two parts:

        - success `bool` False if there was an error, True otherwise.
        - message `str` A long description of success or failure.
    """
    job = get_current_job()

    update_job(job, 0, "Starting...", "started")

    with app.app_context():
        course, created = get_or_create(db.session,
                                        Course,
                                        canvas_id=course_id)

        try:
            course_name = get_course(course_id).get("name", "<UNNAMED COURSE>")
            course.course_name = course_name
            db.session.commit()
        except requests.exceptions.HTTPError:
            update_job(job, 0, "Course not found.", "failed", error=True)
            logger.exception("Unable to find course #{}".format(course_id))

            return job.meta

        quizzes = missing_and_stale_quizzes(course_id)

        num_quizzes = len(quizzes)

        if num_quizzes < 1:
            update_job(
                job,
                100,
                "Complete. No quizzes required updates.",
                "complete",
                error=False,
            )

            return job.meta

        percent_user_map = defaultdict(list)

        inactive_list = []

        update_job(job, 0, "Getting past extensions.", "processing", False)
        for extension in course.extensions:
            # If extension is inactive, ignore.
            if not extension.active:
                inactive_list.append(extension.user.sortable_name)
                logger.debug("Extension #{} is inactive.".format(extension.id))
                continue

            user_canvas_id = (User.query.filter_by(
                id=extension.user_id).first().canvas_id)

            # Check if user is in course. If not, deactivate extension.
            try:
                canvas_user = get_user(course_id, user_canvas_id)

                # Skip user if not a student. Fixes an edge case where a
                # student that previously recieved an extension changes roles.
                enrolls = canvas_user.get("enrollments", [])
                type_list = [
                    e["type"] for e in enrolls
                    if e["enrollment_state"] in ("active", "invited")
                ]
                if not any(t == "StudentEnrollment" for t in type_list):
                    logger.info(
                        ("User #{} was found in course #{}, but is not an "
                         "active student. Deactivating extension #{}. Roles "
                         "found: {}").format(
                             user_canvas_id,
                             course_id,
                             extension.id,
                             ", ".join(type_list)
                             if len(enrolls) > 0 else None,
                         ))
                    extension.active = False
                    db.session.commit()
                    inactive_list.append(extension.user.sortable_name)
                    continue

            except requests.exceptions.HTTPError:
                log_str = "User #{} not in course #{}. Deactivating extension #{}."
                logger.info(
                    log_str.format(user_canvas_id, course_id, extension.id))
                extension.active = False
                db.session.commit()
                inactive_list.append(extension.user.sortable_name)
                continue

            percent_user_map[extension.percent].append(user_canvas_id)

        if len(percent_user_map) < 1:
            msg_str = "No active extensions were found.<br>"

            if len(inactive_list) > 0:
                msg_str += " Extensions for the following students are inactive:<br>{}"
                msg_str = msg_str.format("<br>".join(inactive_list))

            update_job(job, 100, msg_str, "complete", error=False)
            return job.meta

        for index, quiz in enumerate(quizzes):
            quiz_id = quiz.get("id", None)
            quiz_title = quiz.get("title", "[UNTITLED QUIZ]")

            comp_perc = int(((float(index)) / float(num_quizzes)) * 100)
            refreshing_str = "Refreshing quiz #{} - {} [{} of {}]"
            update_job(
                job,
                comp_perc,
                refreshing_str.format(quiz_id, quiz_title, index + 1,
                                      num_quizzes),
                "processing",
                error=False,
            )

            for percent, user_list in percent_user_map.items():
                extension_response = extend_quiz(course_id, quiz, percent,
                                                 user_list)

                if extension_response.get("success", False) is True:
                    # add/update quiz
                    quiz_obj, created = get_or_create(db.session,
                                                      Quiz,
                                                      canvas_id=quiz_id,
                                                      course_id=course.id)
                    quiz_obj.title = quiz_title
                    quiz_obj.time_limit = quiz.get("time_limit")

                    db.session.commit()
                else:
                    error_message = "Some quizzes couldn't be updated. "
                    error_message += extension_response.get("message", "")
                    update_job(job,
                               comp_perc,
                               error_message,
                               "failed",
                               error=True)
                    return job.meta

        msg = "{} quizzes have been updated.".format(len(quizzes))
        update_job(job, 100, msg, "complete", error=False)
        return job.meta
Example #52
0
def get_job_object():

    return get_current_job()
Example #53
0
def invoke_iap_export(timestamp_id, output_path, username, shared_folder_map, task_key, analysis_iap_id=None):
    """
    This Methods represents an RQ Job workload. It should be enqueued into the RQ Analysis Queue and processed by an according worker

    Handles the invocation of data export of an IAP analysis on the IAP server and fetches the resulting information afterwards.
    The received information is then entered into the database accordingly.

    The analysis_iap_id has to be either passed directly or has to be stored in the result
    of a job that this one depends on. The key under which it must be stored from the previous job is 'response.result_id'

    :param timestamp_id: The ID of the :class:`~server.models.timestamp_model.TimestampModel` instance to which the data belongs
    :param output_path: The path, as SMB URL, where the data should be exported to
    :param username: The username of the user invoking this job
    :param task_key: The ID of the :class:`~server.modules.processing.analysis.analysis_task.AnalysisTask` to which this job belongs
    :param shared_folder_map: A dict containing a mapping between SMB URLs and local paths representing the corresponding mount points
    :param analysis_iap_id: The IAP ID of the analysis on the IAP server

    :return: a dict containing the 'analysis_id' for which the data has been exported
        and the 'path' to which the results have been exported. (All nested inside the 'response' key)
    """
    print('EXECUTE EXPORT')
    job = get_current_job()
    log_store = get_log_store()
    task = AnalysisTask.from_key(get_redis_connection(), task_key)
    channel = get_grpc_channel()
    iap_stub = phenopipe_iap_pb2_grpc.PhenopipeIapStub(channel)
    pipe_stub = phenopipe_pb2_grpc.PhenopipeStub(channel)

    if analysis_iap_id is None:
        analysis_iap_id = job.dependency.result['response']['result_id']

    log_store.put(job.id, 'Started Export Job', 0)
    task.update_message('Started Export Job')
    try:
        response = iap_stub.ExportExperiment(
            phenopipe_iap_pb2.ExportRequest(experiment_id=analysis_iap_id, destination_path=output_path)
        )
        remote_job_id = response.job_id
        request = phenopipe_pb2.WatchJobRequest(
            job_id=remote_job_id
        )
        status = pipe_stub.WatchJob(request)
        for msg in status:
            print(msg.message.decode('string-escape'))
            log_store.put(job.id, msg.message.decode('string-escape'), msg.progress)

        response = iap_stub.FetchExportResult(
            phenopipe_pb2.FetchJobResultRequest(job_id=remote_job_id)
        )
        session = get_session()
        analysis = session.query(AnalysisModel) \
            .filter(AnalysisModel.timestamp_id == timestamp_id) \
            .filter(AnalysisModel.iap_id == analysis_iap_id) \
            .one()

        log_store.put(job.id, 'Received Results. Started to parse and add information', 90)
        task.update_message('Received Results. Started to parse and add information')
        image_path = get_local_path_from_smb(response.image_path, shared_folder_map)
        # TODO handle DB errors
        for image_name in os.listdir(image_path):
            # Extract information from filename
            snapshot_id, _, new_filename = image_name.partition('_')
            _, _, angle = os.path.splitext(image_name)[0].rpartition('_')

            img = ImageModel(snapshot_id, response.image_path, new_filename, angle, 'segmented')
            session.add(img)
            # rename file and remove the sniapshot id
            os.rename(os.path.join(image_path, image_name), os.path.join(image_path, new_filename))
        analysis.export_path = response.path
        exported_at = datetime.utcnow()
        analysis.exported_at = exported_at
        session.commit()
        log_store.put(job.id, 'Finished Export Job', 100)
        task.update_message('Finished Export Job')
        return create_return_object(JobType.iap_export, timestamp_id,
                                    {'analysis_id': analysis.id, 'path': response.path, 'exported_at': exported_at})
    except grpc.RpcError as e:
        log_store.put(job.id, e.details(), 0)
        task.update_message('Export Job Failed')
        raise
Example #54
0
def xloader_data_into_datastore_(input, job_dict):
    '''This function:
    * downloads the resource (metadata) from CKAN
    * downloads the data
    * calls the loader to load the data into DataStore
    * calls back to CKAN with the new status

    (datapusher called this function 'push_to_datastore')
    '''
    job_id = get_current_job().id
    db.init(config)

    # Store details of the job in the db
    try:
        db.add_pending_job(job_id, **input)
    except sa.exc.IntegrityError:
        raise JobError('job_id {} already exists'.format(job_id))

    # Set-up logging to the db
    handler = StoringHandler(job_id, input)
    level = logging.DEBUG
    handler.setLevel(level)
    logger = logging.getLogger(job_id)
    handler.setFormatter(logging.Formatter('%(message)s'))
    logger.addHandler(handler)
    # also show logs on stderr
    logger.addHandler(logging.StreamHandler())
    logger.setLevel(logging.DEBUG)

    validate_input(input)

    data = input['metadata']

    ckan_url = data['ckan_url']
    resource_id = data['resource_id']
    api_key = input.get('api_key')

    try:
        resource, dataset = get_resource_and_dataset(resource_id)
    except (JobError, ObjectNotFound) as e:
        # try again in 5 seconds just in case CKAN is slow at adding resource
        time.sleep(5)
        resource, dataset = get_resource_and_dataset(resource_id)
    resource_ckan_url = '/dataset/{}/resource/{}' \
        .format(dataset['name'], resource['id'])
    logger.info('Express Load starting: {}'.format(resource_ckan_url))

    # check if the resource url_type is a datastore
    if resource.get('url_type') == 'datastore':
        logger.info('Ignoring resource - url_type=datastore - dump files are '
                    'managed with the Datastore API')
        return

    # download resource
    tmp_file, file_hash = _download_resource_data(resource, data, api_key,
                                                  logger)

    # hash isn't actually stored, so this is a bit worthless at the moment
    if (resource.get('hash') == file_hash
            and not data.get('ignore_hash')):
        logger.info('Ignoring resource - the file hash hasn\'t changed: '
                    '{hash}.'.format(hash=file_hash))
        return
    logger.info('File hash: {}'.format(file_hash))
    resource['hash'] = file_hash  # TODO write this back to the actual resource

    def direct_load():
        fields = loader.load_csv(
            tmp_file.name,
            resource_id=resource['id'],
            mimetype=resource.get('format'),
            logger=logger)
        loader.calculate_record_count(
            resource_id=resource['id'], logger=logger)
        set_datastore_active(data, resource, api_key, ckan_url, logger)
        job_dict['status'] = 'running_but_viewable'
        callback_xloader_hook(result_url=input['result_url'],
                              api_key=api_key,
                              job_dict=job_dict)
        logger.info('Data now available to users: {}'.format(resource_ckan_url))
        loader.create_column_indexes(
            fields=fields,
            resource_id=resource['id'],
            logger=logger)

    def messytables_load():
        try:
            loader.load_table(tmp_file.name,
                              resource_id=resource['id'],
                              mimetype=resource.get('format'),
                              logger=logger)
        except JobError as e:
            logger.error('Error during messytables load: {}'.format(e))
            raise
        loader.calculate_record_count(
            resource_id=resource['id'], logger=logger)
        set_datastore_active(data, resource, api_key, ckan_url, logger)
        logger.info('Finished loading with messytables')

    # Load it
    logger.info('Loading CSV')
    just_load_with_messytables = asbool(config.get(
        'ckanext.xloader.just_load_with_messytables', False))
    logger.info("'Just load with messytables' mode is: {}".format(
        just_load_with_messytables))
    try:
        if just_load_with_messytables:
            messytables_load()
        else:
            try:
                direct_load()
            except JobError as e:
                logger.warning('Load using COPY failed: {}'.format(e))
                logger.info('Trying again with messytables')
                messytables_load()
    except FileCouldNotBeLoadedError as e:
        logger.warning('Loading excerpt for this format not supported.')
        logger.error('Loading file raised an error: {}'.format(e))
        raise JobError('Loading file raised an error: {}'.format(e))

    tmp_file.close()

    logger.info('Express Load completed')
Example #55
0
def invoke_iap_import(timestamp_id, experiment_name, coordinator, scientist, local_path, path, username,
                      task_key):
    """
    This Methods represents an RQ Job workload. It should be enqueued into the RQ Analysis Queue and processed by an according worker

    Handles the invokation of data import into IAP on the IAP server and fetches the result information afterwards.
    The received information is then entered into the database accordingly

    :param timestamp_id: The ID of the :class:`~server.models.timestamp_model.TimestampModel` instance which should be imported
    :param experiment_name: The name of the experiment to import
    :param coordinator: The name of the experiment coordinator
    :param scientist: The name of the scientist carrying out the experiment
    :param local_path: The path to the data on the local system
    :param path: The SMB url representing the location of the data
    :param username: The username of the user invoking this job
    :param task_key: The redis key of the :class:`~server.modules.analysis.analysis_task.AnalysisTask` to which this job belongs

    :return: A dict containing the 'experiment_id' (nested in the 'response' key) returned by IAP
    """
    print('EXECUTE IMPORT')
    job = get_current_job()
    log_store = get_log_store()
    task = AnalysisTask.from_key(get_redis_connection(), task_key)
    channel = get_grpc_channel()
    iap_stub = phenopipe_iap_pb2_grpc.PhenopipeIapStub(channel)
    pipe_stub = phenopipe_pb2_grpc.PhenopipeStub(channel)
    log_store.put(job.id, 'Started Import Job', 0)
    task.update_message('Started Import Job')
    log_store.put(job.id, 'Create Metadata File')
    task.update_message('Create Metadata File')
    create_iap_import_sheet(timestamp_id, local_path)
    log_store.put(job.id, 'Metadata File Created')
    task.update_message('Metadata File Created')
    try:
        log_store.put(job.id, 'Import data into IAP')
        task.update_message('Import data into IAP')
        response = iap_stub.ImportExperiment(
            phenopipe_iap_pb2.ImportRequest(path=path, experiment_name=experiment_name,
                                            coordinator_name=coordinator,
                                            user_name=scientist)
        )

        remote_job_id = response.job_id
        request = phenopipe_pb2.WatchJobRequest(
            job_id=remote_job_id
        )
        status = pipe_stub.WatchJob(request)

        for msg in status:
            log_store.put(job.id, msg.message.decode('string-escape'), msg.progress)

        response = iap_stub.FetchImportResult(
            phenopipe_pb2.FetchJobResultRequest(job_id=remote_job_id)
        )
        session = get_session()
        timestamp = session.query(TimestampModel).get(timestamp_id)
        timestamp.iap_exp_id = response.experiment_id
        session.commit()
        log_store.put(job.id, 'Finished Import Job', 100)
        task.update_message('Finished Import Job')
        return create_return_object(JobType.iap_import, timestamp_id, {'experiment_iap_id': response.experiment_id})
    except grpc.RpcError as e:
        if e.code() == grpc.StatusCode.ALREADY_EXISTS:
            session = get_session()
            timestamp = session.query(TimestampModel).get(timestamp_id)
            timestamp.iap_exp_id = e.initial_metadata()[0][1]
            session.commit()
            return create_return_object(JobType.iap_import, timestamp_id, {'experiment_iap_id': timestamp.iap_exp_id})
        else:
            task.update_message('Import Job Failed')
            log_store.put(job.id, e.details(), 0)
            raise
Example #56
0
def task_make_network(form_data):
    job = get_current_job()
    job.meta['progress'] = 'started'
    job.save_meta()

    network = Network(include_experimental=bool(
        form_data['include_experimental']),
                      include_two_step=bool(form_data['include_two_step']),
                      include_requires_absence_of_water=bool(
                          form_data['include_requires_absence_of_water']),
                      print_log=not current_app.config['PRODUCTION'])

    network.update_settings({
        "allow_backwards_steps":
        bool(form_data['allow_backwards']),
        "remove_simple":
        bool(form_data['remove_small']),
        "similarity_score_threshold":
        float(form_data['sub_thres']),
        "combine_enantiomers":
        bool(form_data['combine_enantiomers']),
        "num_enzymes":
        1,
        "calculate_complexities":
        bool(form_data['calc_complexity']),
        "calculate_substrate_specificity":
        bool(form_data['sub_sim']),
        "max_nodes":
        int(form_data['max_initial_nodes'], ),
        "colour_reactions":
        form_data['colour_reactions'],
        "colour_arrows":
        form_data['colour_edges'],
        "show_negative_enzymes":
        form_data['show_neg_enz'],
        "only_postitive_enzyme_data":
        not form_data['show_neg_enz'],
        "max_reactions":
        form_data["max_reactions"],
        'only_reviewed_activity_data':
        bool(form_data["only_reviewed"])
    })

    if form_data[
            "specificity_scoring_mode"] == 'Product + substrates (slower)':
        network.update_settings({'specificity_score_substrates': True})

    #print(f"include_experimental = {network.settings['include_experimental']}")
    #print(f"include_two_step = {network.settings['include_two_step']}")

    network.generate(form_data['target_smiles'],
                     form_data['number_steps'],
                     calculate_scores=False)

    job.meta['progress'] = 'network_generated'
    job.save_meta()

    network.calculate_scores()

    job.meta['progress'] = 'scores_calculated'
    job.save_meta()

    nodes, edges = network.get_visjs_nodes_and_edges()

    #options = {'interaction': {'multiselect': 'true',}}
    options = {}
    default_network_name = 'Network for ' + str(network.target_smiles)

    result = {
        'save_id': str(uuid.uuid4()),
        'save_links': [],
        'save_name': default_network_name,
        'nodes': nodes,
        'edges': edges,
        'options': json.dumps(options),
        'graph_dict': json.dumps(nx.to_dict_of_lists(network.graph)),
        'target_smiles': str(network.target_smiles),
        'network_options': json.dumps(network.settings),
        'attr_dict': json.dumps(network.attributes_dict()),
        'max_reactions': int(network.settings['max_reactions'])
    }

    current_app.redis.mset({job.id: json.dumps(result)})
    time_to_expire = 15 * 60  #15 mins * 60 seconds
    current_app.redis.expire(job.id, time_to_expire)

    return result
Example #57
0
def _create_thread(tid, data):
    slogger.glob.info("create task #{}".format(tid))

    db_task = models.Task.objects.select_for_update().get(pk=tid)
    db_data = db_task.data
    if db_task.data.size != 0:
        raise NotImplementedError("Adding more data is not implemented")

    upload_dir = db_data.get_upload_dirname()

    if data['remote_files']:
        data['remote_files'] = _download_data(data['remote_files'], upload_dir)

    manifest_file = []
    media = _count_files(data, manifest_file)
    media, task_mode = _validate_data(media, manifest_file)
    if manifest_file:
        assert settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE, \
            "File with meta information can be uploaded if 'Use cache' option is also selected"

    if data['server_files']:
        if db_data.storage == StorageChoice.LOCAL:
            _copy_data_from_share(data['server_files'], upload_dir)
        else:
            upload_dir = settings.SHARE_ROOT

    av_scan_paths(upload_dir)

    job = rq.get_current_job()
    job.meta['status'] = 'Media files are being extracted...'
    job.save_meta()

    db_images = []
    extractor = None

    for media_type, media_files in media.items():
        if media_files:
            if extractor is not None:
                raise Exception('Combined data types are not supported')
            source_paths = [os.path.join(upload_dir, f) for f in media_files]
            if media_type in {'archive', 'zip'
                              } and db_data.storage == StorageChoice.SHARE:
                source_paths.append(db_data.get_upload_dirname())
                upload_dir = db_data.get_upload_dirname()
                db_data.storage = StorageChoice.LOCAL
            extractor = MEDIA_TYPES[media_type]['extractor'](
                source_path=source_paths,
                step=db_data.get_frame_step(),
                start=db_data.start_frame,
                stop=data['stop_frame'],
            )

    validate_dimension = ValidateDimension()
    if extractor.__class__ == MEDIA_TYPES['zip']['extractor']:
        extractor.extract()
        validate_dimension.set_path(
            os.path.split(extractor.get_zip_filename())[0])
        validate_dimension.validate()
        if validate_dimension.dimension == DimensionType.DIM_3D:
            db_task.dimension = DimensionType.DIM_3D

            extractor.reconcile(
                source_files=list(validate_dimension.related_files.keys()),
                step=db_data.get_frame_step(),
                start=db_data.start_frame,
                stop=data['stop_frame'],
                dimension=DimensionType.DIM_3D,
            )
            extractor.add_files(validate_dimension.converted_files)

    related_images = {}
    if isinstance(extractor, MEDIA_TYPES['image']['extractor']):
        extractor.filter(lambda x: not re.search(
            r'(^|{0})related_images{0}'.format(os.sep), x))
        related_images = detect_related_images(extractor.absolute_source_paths,
                                               upload_dir)

    db_task.mode = task_mode
    db_data.compressed_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' and not data[
        'use_zip_chunks'] else models.DataChoice.IMAGESET
    db_data.original_chunk_type = models.DataChoice.VIDEO if task_mode == 'interpolation' else models.DataChoice.IMAGESET

    def update_progress(progress):
        progress_animation = '|/-\\'
        if not hasattr(update_progress, 'call_counter'):
            update_progress.call_counter = 0

        status_template = 'Images are being compressed {}'
        if progress:
            current_progress = '{}%'.format(round(progress * 100))
        else:
            current_progress = '{}'.format(
                progress_animation[update_progress.call_counter])
        job.meta['status'] = status_template.format(current_progress)
        job.save_meta()
        update_progress.call_counter = (update_progress.call_counter +
                                        1) % len(progress_animation)

    compressed_chunk_writer_class = Mpeg4CompressedChunkWriter if db_data.compressed_chunk_type == DataChoice.VIDEO else ZipCompressedChunkWriter
    if db_data.original_chunk_type == DataChoice.VIDEO:
        original_chunk_writer_class = Mpeg4ChunkWriter
        # Let's use QP=17 (that is 67 for 0-100 range) for the original chunks, which should be visually lossless or nearly so.
        # A lower value will significantly increase the chunk size with a slight increase of quality.
        original_quality = 67
    else:
        original_chunk_writer_class = ZipChunkWriter
        original_quality = 100

    kwargs = {}
    if validate_dimension.dimension == DimensionType.DIM_3D:
        kwargs["dimension"] = validate_dimension.dimension
    compressed_chunk_writer = compressed_chunk_writer_class(
        db_data.image_quality, **kwargs)
    original_chunk_writer = original_chunk_writer_class(original_quality)

    # calculate chunk size if it isn't specified
    if db_data.chunk_size is None:
        if isinstance(compressed_chunk_writer, ZipCompressedChunkWriter):
            w, h = extractor.get_image_size(0)
            area = h * w
            db_data.chunk_size = max(2, min(72, 36 * 1920 * 1080 // area))
        else:
            db_data.chunk_size = 36

    video_path = ""
    video_size = (0, 0)

    def _update_status(msg):
        job.meta['status'] = msg
        job.save_meta()

    if settings.USE_CACHE and db_data.storage_method == StorageMethodChoice.CACHE:
        for media_type, media_files in media.items():

            if not media_files:
                continue

            # replace manifest file (e.g was uploaded 'subdir/manifest.jsonl')
            if manifest_file and not os.path.exists(
                    db_data.get_manifest_path()):
                shutil.copyfile(os.path.join(upload_dir, manifest_file[0]),
                                db_data.get_manifest_path())
                if upload_dir != settings.SHARE_ROOT:
                    os.remove(os.path.join(upload_dir, manifest_file[0]))

            if task_mode == MEDIA_TYPES['video']['mode']:
                try:
                    manifest_is_prepared = False
                    if manifest_file:
                        try:
                            manifest = VideoManifestValidator(
                                source_path=os.path.join(
                                    upload_dir, media_files[0]),
                                manifest_path=db_data.get_manifest_path())
                            manifest.init_index()
                            manifest.validate_seek_key_frames()
                            manifest.validate_frame_numbers()
                            assert len(manifest) > 0, 'No key frames.'

                            all_frames = manifest['properties']['length']
                            video_size = manifest['properties']['resolution']
                            manifest_is_prepared = True
                        except Exception as ex:
                            if os.path.exists(db_data.get_index_path()):
                                os.remove(db_data.get_index_path())
                            if isinstance(ex, AssertionError):
                                base_msg = str(ex)
                            else:
                                base_msg = 'Invalid manifest file was upload.'
                                slogger.glob.warning(str(ex))
                            _update_status(
                                '{} Start prepare a valid manifest file.'.
                                format(base_msg))

                    if not manifest_is_prepared:
                        _update_status('Start prepare a manifest file')
                        manifest = VideoManifestManager(
                            db_data.get_manifest_path())
                        meta_info = manifest.prepare_meta(
                            media_file=media_files[0],
                            upload_dir=upload_dir,
                            chunk_size=db_data.chunk_size)
                        manifest.create(meta_info)
                        manifest.init_index()
                        _update_status('A manifest had been created')

                        all_frames = meta_info.get_size()
                        video_size = meta_info.frame_sizes
                        manifest_is_prepared = True

                    db_data.size = len(range(db_data.start_frame, min(data['stop_frame'] + 1 \
                        if data['stop_frame'] else all_frames, all_frames), db_data.get_frame_step()))
                    video_path = os.path.join(upload_dir, media_files[0])
                except Exception as ex:
                    db_data.storage_method = StorageMethodChoice.FILE_SYSTEM
                    if os.path.exists(db_data.get_manifest_path()):
                        os.remove(db_data.get_manifest_path())
                    if os.path.exists(db_data.get_index_path()):
                        os.remove(db_data.get_index_path())
                    base_msg = str(ex) if isinstance(ex, AssertionError) \
                        else "Uploaded video does not support a quick way of task creating."
                    _update_status(
                        "{} The task will be created using the old method".
                        format(base_msg))
            else:  # images, archive, pdf
                db_data.size = len(extractor)
                manifest = ImageManifestManager(db_data.get_manifest_path())
                if not manifest_file:
                    if db_task.dimension == DimensionType.DIM_2D:
                        meta_info = manifest.prepare_meta(
                            sources=extractor.absolute_source_paths,
                            meta={
                                k: {
                                    'related_images': related_images[k]
                                }
                                for k in related_images
                            },
                            data_dir=upload_dir)
                        content = meta_info.content
                    else:
                        content = []
                        for source in extractor.absolute_source_paths:
                            name, ext = os.path.splitext(
                                os.path.relpath(source, upload_dir))
                            content.append({
                                'name': name,
                                'meta': {
                                    'related_images':
                                    related_images[''.join((name, ext))]
                                },
                                'extension': ext
                            })
                    manifest.create(content)
                manifest.init_index()
                counter = itertools.count()
                for _, chunk_frames in itertools.groupby(
                        extractor.frame_range,
                        lambda x: next(counter) // db_data.chunk_size):
                    chunk_paths = [(extractor.get_path(i), i)
                                   for i in chunk_frames]
                    img_sizes = []

                    for _, frame_id in chunk_paths:
                        properties = manifest[frame_id]
                        if db_task.dimension == DimensionType.DIM_2D:
                            resolution = (properties['width'],
                                          properties['height'])
                        else:
                            resolution = extractor.get_image_size(frame_id)
                        img_sizes.append(resolution)

                    db_images.extend([
                        models.Image(data=db_data,
                                     path=os.path.relpath(path, upload_dir),
                                     frame=frame,
                                     width=w,
                                     height=h)
                        for (path, frame), (w,
                                            h) in zip(chunk_paths, img_sizes)
                    ])

    if db_data.storage_method == StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
        counter = itertools.count()
        generator = itertools.groupby(
            extractor, lambda x: next(counter) // db_data.chunk_size)
        for chunk_idx, chunk_data in generator:
            chunk_data = list(chunk_data)
            original_chunk_path = db_data.get_original_chunk_path(chunk_idx)
            original_chunk_writer.save_as_chunk(chunk_data,
                                                original_chunk_path)

            compressed_chunk_path = db_data.get_compressed_chunk_path(
                chunk_idx)
            img_sizes = compressed_chunk_writer.save_as_chunk(
                chunk_data, compressed_chunk_path)

            if db_task.mode == 'annotation':
                db_images.extend([
                    models.Image(data=db_data,
                                 path=os.path.relpath(data[1], upload_dir),
                                 frame=data[2],
                                 width=size[0],
                                 height=size[1])
                    for data, size in zip(chunk_data, img_sizes)
                ])
            else:
                video_size = img_sizes[0]
                video_path = chunk_data[0][1]

            db_data.size += len(chunk_data)
            progress = extractor.get_progress(chunk_data[-1][2])
            update_progress(progress)

    if db_task.mode == 'annotation':
        models.Image.objects.bulk_create(db_images)
        created_images = models.Image.objects.filter(data_id=db_data.id)

        db_related_files = [
            RelatedFile(data=image.data,
                        primary_image=image,
                        path=os.path.join(upload_dir, related_file_path))
            for image in created_images
            for related_file_path in related_images.get(image.path, [])
        ]
        RelatedFile.objects.bulk_create(db_related_files)
        db_images = []
    else:
        models.Video.objects.create(data=db_data,
                                    path=os.path.relpath(
                                        video_path, upload_dir),
                                    width=video_size[0],
                                    height=video_size[1])

    if db_data.stop_frame == 0:
        db_data.stop_frame = db_data.start_frame + (
            db_data.size - 1) * db_data.get_frame_step()
    else:
        # validate stop_frame
        db_data.stop_frame = min(db_data.stop_frame, \
            db_data.start_frame + (db_data.size - 1) * db_data.get_frame_step())

    preview = extractor.get_preview()
    preview.save(db_data.get_preview_path())

    slogger.glob.info("Found frames {} for Data #{}".format(
        db_data.size, db_data.id))
    _save_task_to_db(db_task)
Example #58
0
    def train_script_wrapper(cls,
                             project,
                             label_config,
                             train_kwargs,
                             initialization_params=None,
                             tasks=()):

        if initialization_params:
            # Reinitialize new cls instance for using in RQ context
            initialization_params = initialization_params or {}
            cls.initialize(**initialization_params)

        # fetching the latest model version before we generate the next one
        t = time.time()
        m = cls.fetch(project, label_config)
        m.is_training = True

        version = cls._generate_version()

        if cls.model_dir:
            logger.debug('Running in model dir: ' + cls.model_dir)
            project_model_dir = os.path.join(cls.model_dir, project or '')
            workdir = os.path.join(project_model_dir, version)
            os.makedirs(workdir, exist_ok=True)
        else:
            logger.debug('Running without model dir')
            workdir = None

        if cls.without_redis():
            data_stream = tasks
        else:
            data_stream = (
                json.loads(t)
                for t in cls._redis.lrange(cls._get_tasks_key(project), 0, -1))

        if workdir:
            data_stream, snapshot = tee(data_stream)
            cls.create_data_snapshot(snapshot, workdir)

        try:
            train_output = m.model.fit(data_stream, workdir, **train_kwargs)
            if cls.without_redis():
                job_id = None
            else:
                job_id = get_current_job().id
            job_result = json.dumps({
                'status': 'ok',
                'train_output': train_output,
                'project': project,
                'workdir': workdir,
                'version': version,
                'job_id': job_id,
                'time': time.time() - t
            })
            if workdir:
                job_result_file = os.path.join(workdir, 'job_result.json')
                with open(job_result_file, mode='w') as fout:
                    fout.write(job_result)
            if not cls.without_redis():
                cls._redis.rpush(cls._get_job_results_key(project), job_result)
        except:
            raise
        finally:
            m.is_training = False
        return job_result
Example #59
0
def check_dependencies_are_met():
    return get_current_job().dependencies_are_met()
Example #60
0
def save_key_ttl(key):
    # Stores key ttl in meta
    job = get_current_job()
    ttl = job.connection.ttl(key)
    job.meta = {'ttl': ttl}
    job.save_meta()