Python import_result_jobの例

プログラミング言語: Python

名前空間/パッケージ名: org.bccvl.tasks.utils

メソッド/関数: import_result_job

hotexamples.comのコード掲載数: 7

Python import_result_job - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのorg.bccvl.tasks.utils.import_result_jobの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

 def mock_run_script(self, *args, **kw):
     # simulate a script run
     wrapper, params, context = args
     # 1. write file into results_dir
     tmpdir = urlsplit(params['result']['results_dir']).path
     try:
         # 2. create some result files
         for fname in ('model.RData',
                       'proj_test.tif'):
             img = Image.new('F', (10, 10))
             img.save(os.path.join(tmpdir, fname), 'TIFF')
         # 3. store results
         items = [
             {
                 'file': {
                     'url': 'file://{}/model.RData'.format(tmpdir),
                     'contenttype': 'application/x-r-data',
                     'filename': 'model.RData',
                 },
                 'title': 'Model Title',
                 'description': 'Model Description',
                 'bccvlmetadata': {
                     'genre': 'DataGenreSDMModel',
                 },
                 'filemetadata': {},
                 'layermd': {}
             },
             {
                 'file': {
                     'url': 'file://{}/proj_test.tif'.format(tmpdir),
                     'contenttype': 'image/tiff',
                     'filename': 'proj_test.tif',
                 },
                 'title': 'Test Projection',
                 'description': 'Test Projection Description',
                 'bccvlmetadata': {
                     'genre': 'DataGenreCP',
                 },
                 'filemetadata': {
                     'band': [{
                         'min': 0.0,
                         'STATISTICS_MINIMUM': 0.0,
                         'max': 1.0
                     }]
                 },
                 'layermd': {'files': {'proj_test.tif': {'layer': 'projection_probability', 'data_type': 'Continuous'}}}
             }
         ]
         # TODO: tasks called dierctly here; maybe call them as tasks as
         # well? (chain?)
         import_result_job(items, params['result'][
                           'results_dir'], context).delay()
         import_cleanup(params['result']['results_dir'], context)
         set_progress('COMPLETED', 'Test Task succeeded', None, context)
     except Exception as e:
         # 4. clean up if problem otherwise import task cleans up
         #    TODO: should be done by errback or whatever
         import_cleanup(params['result']['results_dir'], context)
         set_progress('FAILED', 'Test Task failed', None, context)
         raise

コード例 #2

ファイルを表示

 def mock_run_script(self, *args, **kw):
     # simulate a script run
     wrapper, params, context = args
     # 1. write file into results_dir
     tmpdir = urlsplit(params['result']['results_dir']).path
     try:
         for fname in ('model.RData',
                       'traits.txt'):
             open(os.path.join(tmpdir, fname), 'w').write('Mock Result')
         # 3. store results
         items = [
             {
                 'file': {
                     'url': 'file://{}/model.RData'.format(tmpdir),
                     'contenttype': 'application/x-r-data',
                     'filename': 'model.RData',
                 },
                 'title': 'Model Title',
                 'description': 'Model Description',
                 'bccvlmetadata': {
                     'genre': 'DataGenreSTModel',
                 },
                 'filemetadata': {},
                 'layermd': {}
             },
             {
                 'file': {
                     'url': 'file://{}/traits.txt'.format(tmpdir),
                     'contenttype': 'text/plain',
                     'filename': 'traits.txt',
                 },
                 'title': 'Test Traits',
                 'description': 'Test Traits Description',
                 'bccvlmetadata': {
                     'genre': 'DataGenreSTResult',
                 },
                 'filemetadata': {},
                 'layermd': {},
             }
         ]
         # TODO: tasks called dierctly here; maybe call them as tasks as
         # well? (chain?)
         import_result_job(items, params['result'][
                           'results_dir'], context).delay()
         import_cleanup(params['result']['results_dir'], context)
         set_progress('COMPLETED', 'Test Task succeeded', None, context)
     except Exception as e:
         # 4. clean up if problem otherwise import task cleans up
         #    TODO: should be done by errback or whatever
         import_cleanup(params['result']['results_dir'], context)
         set_progress('FAILED', 'Test Task failed', None, context)
         raise

コード例 #3

ファイルを表示

 def mock_run_script(self, *args, **kw):
     # simulate a script run
     wrapper, params, context = args
     # 1. write file into results_dir
     tmpdir = urlsplit(params['result']['results_dir']).path
     try:
         for fname in ('model.RData',
                       'traits.txt'):
             open(os.path.join(tmpdir, fname), 'w').write('Mock Result')
         # 3. store results
         items = [
             {
                 'file': {
                     'url': 'file://{}/model.RData'.format(tmpdir),
                     'contenttype': 'application/x-r-data',
                     'filename': 'model.RData',
                 },
                 'title': 'Model Title',
                 'description': 'Model Description',
                 'bccvlmetadata': {
                     'genre': 'DataGenreSTModel',
                 },
                 'filemetadata': {},
                 'layermd': {}
             },
             {
                 'file': {
                     'url': 'file://{}/traits.txt'.format(tmpdir),
                     'contenttype': 'text/plain',
                     'filename': 'traits.txt',
                 },
                 'title': 'Test Traits',
                 'description': 'Test Traits Description',
                 'bccvlmetadata': {
                     'genre': 'DataGenreSTResult',
                 },
                 'filemetadata': {},
                 'layermd': {},
             }
         ]
         # TODO: tasks called dierctly here; maybe call them as tasks as
         # well? (chain?)
         import_result_job(items, params['result'][
                           'results_dir'], context).delay()
         import_cleanup(params['result']['results_dir'], context)
         set_progress('COMPLETED', 'Test Task succeeded', None, context)
     except Exception as e:
         # 4. clean up if problem otherwise import task cleans up
         #    TODO: should be done by errback or whatever
         import_cleanup(params['result']['results_dir'], context)
         set_progress('FAILED', 'Test Task failed', None, context)
         raise

コード例 #4

ファイルを表示

 def mock_run_script(self, *args, **kw):
     # simulate a script run
     wrapper, params, context = args
     # 1. write file into results_dir
     tmpdir = urlsplit(params['result']['results_dir']).path
     try:
         # 2. create some result files
         for fname in ('ensemble.tif',):
             img = Image.new('F', (10, 10))
             img.save(os.path.join(tmpdir, fname), 'TIFF')
         # 3. store results
         items = [
             {
                 'file': {
                     'url': 'file://{}/ensemble.tif'.format(tmpdir),
                     'contenttype': 'image/tiff',
                     'filename': 'ensemble.tif',
                 },
                 'title': 'Ensemble Output',
                 'description': 'Ensemble Output Description',
                 'bccvlmetadata': {
                     'genre': 'DataGenreEnsembleResult',
                 },
                 'filemetadata': {
                     'band': [{
                         'min': 0.0,
                         'STATISTICS_MINIMUM': 0.0,
                         'max': 1.0
                     }]
                 },
                 'layermd': {},
             }
         ]
         # TODO: tasks called dierctly here; maybe call them as tasks as
         # well? (chain?)
         import_result_job(items, params['result'][
                           'results_dir'], context).delay()
         import_cleanup(params['result']['results_dir'], context)
         set_progress('COMPLETED', 'Test Task succeeded', None, context)
     except Exception as e:
         # 4. clean up if problem otherwise import task cleans up
         #    TODO: should be done by errback or whatever
         import_cleanup(params['result']['results_dir'], context)
         set_progress('FAILED', 'Test Task failed', None, context)
         raise

コード例 #5

ファイルを表示

    def mock_run_script(self, *args, **kw):
        # simulate a script run
        wrapper, params, context = args
        # 1. write file into results_dir
        tmpdir = urlsplit(params['result']['results_dir']).path
        try:
            # 2. create some result files
            for fname in ('model.RData',
                          'proj_test.tif'):
                img = Image.new('F', (10, 10))
                img.save(os.path.join(tmpdir, fname), 'TIFF')
            # 3. store results
            items = [
                {
                    'file': {
                        'url': 'file://{}/model.RData'.format(tmpdir),
                        'contenttype': 'application/x-r-data',
                        'filename': 'model.RData',
                    },
                    'title': 'Model Title',
                    'description': 'Model Description',
                    'bccvlmetadata': {
                        'genre': 'DataGenreSDMModel',
                    },
                    'filemetadata': {},
                    'layermd': {}
                },
                {
                    'file': {
                        'url': 'file://{}/proj_test.tif'.format(tmpdir),
                        'contenttype': 'image/tiff',
                        'filename': 'proj_test.tif',
                    },
                    'title': 'Test Projection',
                    'description': 'Test Projection Description',
                    'bccvlmetadata': {
                        'genre': 'DataGenreCP',
                    },
                    'filemetadata': {
                        'band': [{
                            'min': 0.0,
                            'STATISTICS_MINIMUM': 0.0,
                            'max': 1.0
                        }]
                    },
                    'layermd': {'files': {'proj_test.tif': {'layer': 'projection_probability', 'data_type': 'Continuous'}}}
                },
                {
                    'file': {
                        'url': 'file://{}/proj_test.tif'.format(tmpdir),
                        'contenttype': 'image/tiff',
                        'filename': 'proj_test.tif',
                    },
                    'title': 'Test Envelop Projection',
                    'description': 'Test Envelop Projection Description',
                    'bccvlmetadata': {
                        'genre': 'DataGenreCP_ENVLOP',
                    },
                    'filemetadata': {
                        'band': [{
                            'min': 0.0,
                            'STATISTICS_MINIMUM': 0.0,
                            'max': 1.0
                        }]
                    },
                    'layermd': {'files': {'proj_test.tif': {'layer': 'projection_probability', 'data_type': 'Continuous'}}}
                }

            ]
            # TODO: tasks called dierctly here; maybe call them as tasks as
            # well? (chain?)
            import_result_job(items, params['result'][
                              'results_dir'], context).delay()
            import_cleanup(params['result']['results_dir'], context)
            set_progress('COMPLETED', 'Test Task succeeded', None, context)
        except Exception as e:
            # 4. clean up if problem otherwise import task cleans up
            #    TODO: should be done by errback or whatever
            import_cleanup(params['result']['results_dir'], context)
            set_progress('FAILED', 'Test Task failed', None, context)
            raise

コード例 #6

ファイルを表示

ファイル: compute.py プロジェクト: BCCVL/org.bccvl.tasks

def run_script(wrapper, params, context):
    # TODO: there are many little things that can fail here, and we
    #       need to communicate it properly back to the user.
    # TODO: however, we can't really do anything in case sending
    #       messages doesn't work.
    items = []
    try:
        errmsg = 'Fail to transfer/import data'
        set_progress('RUNNING', 'Transferring data', None, context)

        # create initial folder structure
        create_workenv(params)

        # transfer input files
        transfer_inputs(params, context)
        # create script
        scriptname = create_scripts(params, context)

        # run the script
        errmsg = 'Fail to run experiement'
        set_progress('RUNNING', 'Executing job', None, context)

        scriptout = os.path.join(params['env']['outputdir'],
                                 params['worker']['script']['name'] + 'out')
        outfile = open(scriptout, 'w')
        wrapsh = os.path.join(params['env']['scriptdir'], 'wrap.sh')
        open(wrapsh, 'w').write(wrapper)
        # zip up workenv if requested
        if params['worker'].get('zipworkenv', False):
            # make sure tmp is big enough
            # TODO: add toolkit name to zip name ... workenv_bioclim.zip
            zip_folder(os.path.join(params['env']['outputdir'], 'workenv.zip'),
                       params['env']['workdir'])
        cmd = ["/bin/bash", "-l", "wrap.sh", scriptname]
        LOG.info("Executing: %s", ' '.join(cmd))
        proc = subprocess.Popen(cmd, cwd=params['env']['scriptdir'],
                                close_fds=True,
                                stdout=outfile, stderr=subprocess.STDOUT)
        rpid, ret, rusage = os.wait4(proc.pid, 0)
        # TODO: should we write this as json file and send as result back
        #       or just send rusage with finished message?
        usage = get_rusage(rusage)
        # TODO: check whether ret and proc.returncode are the same

        # move results back
        errmsg = 'Fail to transfer results back'
        set_progress('RUNNING', 'Transferring outputs', usage, context)
        # TODO: maybe redesign this?
        #       transfer only uploads to destination and stores new url somewhere
        # and we do metadata extraction and item creation afterwards (here)?
        items = transfer_outputs(params, context)

        # we are done here, hand over to result importer
        # build a chain of the remaining tasks
        start_import = set_progress_job(
            'RUNNING', 'Import results', None, context)

        cleanup_job = import_cleanup_job(
            params['result']['results_dir'], context)
        import_job = import_result_job(items, params['result'][
                                       'results_dir'], context)
        import_job.link_error(set_progress_job(
            'FAILED', 'Result import failed', None, context))
        import_job.link_error(cleanup_job)

        if ret != 0:
            errmsg = 'Script execution failed with exit code {0}'.format(ret)
            finish_job = set_progress_job('FAILED', errmsg, None, context)
        else:
            finish_job = set_progress_job(
                'COMPLETED', 'Task succeeded', None, context)

        (start_import | import_job | cleanup_job | finish_job).delay()

    except Exception as e:
        # TODO: capture stacktrace
        # need to start import to get import cleaned up

        # Log error message with stacktrace.
        #:( exposes internals, ugly hash, complicated with admin only access
        #-> certainly need to get rid of exception in message.
        # test exceptions:
        #  ... upload file, replace with something else (unzip error)
        #  ... delete file and rerun experiment (donwload error)
        #  ... create file/folder error? (can't write log)
        #  ... how to simulate fault? (download error)

        # log error message with exception and traceback
        LOG.error(errmsg, exc_info=True)

        start_import = set_progress_job(
            'RUNNING', 'Import results', None, context)

        import_job = import_result_job(items, params['result'][
                                       'results_dir'], context)
        import_job.link_error(set_progress_job(
            'FAILED', 'Result import failed', None, context))

        finish_job = set_progress_job('FAILED', errmsg, None, context)

        (start_import | import_job | finish_job).delay()
        raise
    finally:
        # TODO:  check if dir exists
        path = params['env'].get('workdir', None)
        if path and os.path.exists(path):
            shutil.rmtree(path)

コード例 #7

ファイルを表示

ファイル: tasks.py プロジェクト: BCCVL/org.bccvl.tasks

def import_multi_species_csv(url, results_dir, import_context, context):
    # url .... source file
    # results_dir ... folder to place split files into
    # context ... the context with user and orig dataset
    try:
        set_progress('RUNNING', 'Split {0}'.format(url), None, context)
        # step 1: update main dataset metadata
        tmpdir = tempfile.mkdtemp()
        userid = context.get('user', {}).get('id')
        settings = app.conf.get('bccvl', {})
        src = build_source(url, userid, settings)
        dst = build_destination('file://{}'.format(tmpdir), settings)
        movelib.move(src, dst)

        # Get the downloaded filename
        tmpfile = glob.glob(os.path.join(tmpdir, '*'))[0]

        # Extract occurrence file from downloaded file
        mimetype, enc = mimetypes.guess_type(tmpfile)
        if mimetype == 'application/zip':
            src_occ_data = os.path.join('data', 'ala_occurrence.csv')
            with zipfile.ZipFile(tmpfile, 'r') as zipf:
                occfile = os.path.join(tmpdir, src_occ_data)
                zipf.extract(src_occ_data, tmpdir)
            item = {
                'filemetadata': extract_metadata(tmpfile, 'application/zip')
            }
            occmd = item['filemetadata'].get(src_occ_data, {}).get('metadata', {})
        else:
            # csv file
            item = {
                'filemetadata': extract_metadata(tmpfile, "text/csv")
            }
            occfile = tmpfile
            occmd = item['filemetadata']

        # Check that there are lon and lat columns
        # if upload is of type csv, we validate column names as well
        if ('headers' not in occmd
                or 'lat' not in occmd['headers']
                or 'lon' not in occmd['headers']):
            raise Exception("Missing 'lat'/'lon' column")

        set_progress('RUNNING',
                     'Import metadata for {0}'.format(url),
                     None, context)

        import_md_job = import_file_metadata_job([item], url, context)
        import_md_job.link_error(set_progress_job(
            "FAILED", "Metadata update failed for {0}".format(url),
            None, context))

        # step 2: split csv file and create sub datasets
        # start reading csv file and create new datasets which will be
        #       linked up with dataset collection item
        # FIXME: large csv files should be streamed to seperate files (not read
        #        into ram like here)
        f = io.open(occfile, 'r', encoding='utf-8', errors='ignore')
        csvreader = UnicodeCSVReader(f)
        headers = csvreader.next()
        if 'species' not in headers:
            raise Exception('missing species column')
        speciesidx = headers.index('species')
        # create dict with all data .... species column used as key, and rest
        # is just added
        data = {}
        for row in csvreader:
            if not row:
                continue
            species = row[speciesidx]
            if species not in data:
                # create new entry for species
                fname = u'{0}.csv'.format(species).replace(
                    u'/', u'_').encode('idna')
                # TODO: make sure fname contains only legal filename characters
                fpath = os.path.join(tmpdir, fname)
                file = io.open(fpath, 'wb')
                fwriter = UnicodeCSVWriter(file)
                fwriter.writerow(headers)
                data[species] = {
                    'file': file,
                    'writer': fwriter,
                    'path': fpath,
                    'name': fname
                }
            data[species]['writer'].writerow(row)
        # ok we have got all data and everything in separate files
        # close all files
        for species in data:
            data[species]['file'].close()
            del data[species]['file']
            del data[species]['writer']
        # extract metadata
        for species in data:
            data[species]['filemetadata'] = extract_metadata(
                data[species]['path'],
                'text/csv'
            )
        # send files to destination
        for species in data:
            src = build_source('file://{}'.format(data[species]['path']))
            dst = build_destination(os.path.join(results_dir,
                                                 data[species]['name']),
                                    app.conf.get('bccvl', {}))
            data[species]['url'] = dst['url']
            movelib.move(src, dst)
        # all files uploaded .... send import jobs
        set_progress('RUNNING', 'Create datasets for {0}'.format(
            url), None, context)
        items = []
        for species in data:
            # build item
            item = {
                'title': u'{0} occurrences'.format(species),
                'description': '',
                'file': {
                    'url': data[species]['url'],
                    'filename': data[species]['name'],
                    'contenttype': 'text/csv',
                },
                'bccvlmetadata': {
                    'genre': 'DataGenreSpeciesOccurrence',
                    'categories': ['occurrence'],
                    'species': {
                        'scientificName': species,
                    }
                },
                'filemetadata': data[species]['filemetadata'],
                '_partof': {
                    # add back reference to orig dataset
                    # TODO: shouldn't use absolute path here
                    'path': context['context']
                }
            }
            items.append(item)
        # start import process
        start_import = set_progress_job(
            'RUNNING', 'Import results', None, context)
        # What is results_dir being used for?
        import_job = import_result_job(items, results_dir, import_context)
        cleanup_job = import_cleanup_job(results_dir, context)
        import_job.link_error(set_progress_job(
            'FAILED', 'Multi species import failed', None, context))
        import_job.link_error(cleanup_job)
        finish_job = set_progress_job(
            'COMPLETED', 'Task succeeded', None, context)
        (start_import | import_md_job | import_job |
         cleanup_job | finish_job).delay()
        # FIXME: missing stuff...
        #        need to set multi species collection to finished at some stage
    except Exception as e:
        set_progress('FAILED',
                     'Error while splitting Multi Species CSV {}: {}'.format(
                         url, e),
                     None, context)
        LOG.error('Multi species split for %s faild: %s', url, e, exc_info=True)
    finally:
        if tmpdir and os.path.exists(tmpdir):
            shutil.rmtree(tmpdir)