def best_parameters_callback(job, parameters=None):

    if not np.any(np.isnan(job.get_result())) and (np.any(np.isnan(parameters['result'])) or (job.get_result() <= parameters['result'])):
        # New best parameter!
        parameters['result'] = job.get_result()
        parameters['job_name'] = job.job_name
        parameters['parameters'] = job.experiment_parameters
        parameters['best_parameters'] = utils.subdict(job.experiment_parameters, dict_parameters_range.keys())

        print "\n\n>>>>>> Found new best parameters: \n%s %s %s\n\n" % (parameters['best_parameters'], parameters['result'], parameters['job_name'])
        parameters['best_parameters']

        np.save('./outputs/best_params', dict(parameters=parameters))

        # If desired, automatically create additional plots.
        if parameters.get('submit_best', False):

            pbs_submission_infos_copy = parameters['pbs_submission_infos_copy']
            try:
                # Will check the best fitting parameters, and relaunch simulations for them, in order to get new cool plots.
                curr_params_label = '_'.join(["%s%.2f" % (k.replace('_', ''), v) for k, v in parameters['best_parameters'].iteritems()])

                ## First do Memory curves + EM Fits
                pbs_submission_infos_copy['other_options'].update(dict(
                    action_to_do='launcher_do_memory_curve_marginal_fi_withplots_live',
                    subaction='collect_responses',
                    inference_method='sample',
                    N=300,
                    T=6,
                    num_samples=300,
                    output_directory=os.path.join(simul_out_dir, 'outputs'),
                    selection_method='last',
                    num_repetitions=10,
                    burn_samples=200,
                    stimuli_generation='random',
                    stimuli_generation_recall='random',
                    session_id='cmaes_bays09_7try_rerun_080816',
                    result_computation='filenameoutput',
                    label='%s_cmaes_bays09_7try_080816' % (curr_params_label)
                ))
                pbs_submission_infos_copy['walltime'] = '40:00:00'
                pbs_submission_infos_copy['submit_label'] = 'bestparam_rerun'

                submit_pbs = submitpbs.SubmitPBS(pbs_submission_infos=pbs_submission_infos_copy, debug=True)

                # Extract the parameters to try
                best_params_resend = [utils.subdict(job.experiment_parameters, dict_parameters_range.keys())]

                # Submit without waiting
                print "Submitting extra job for Plots, parameters:", best_params_resend
                submission_parameters_dict = dict(pbs_submission_infos=pbs_submission_infos_copy, submit_jobs=submit_jobs, wait_jobs_completed=False)
                submit_pbs.submit_minibatch_jobswrapper(best_params_resend, submission_parameters_dict)

            except Exception as e:
                print "Failure while submitting sub-task for best parameter. Continuing anyway."
                print parameters
                print e
Beispiel #2
0
def import_article(journal, article_data, create=True, update=False):
    if not article_data or not isinstance(article_data, dict):
        raise ValueError("given data to import is empty/invalid")
    expected_keys = ['doi', 'volume', 'path', 'article-type', 'manuscript_id']

    # data wrangling
    try:
        kwargs = subdict(article_data, expected_keys)

        # JATS XML doesn't contain the manuscript ID. derive it from doi
        if 'manuscript_id' not in kwargs and 'doi' in kwargs:
            kwargs['manuscript_id'] = doi2msid(kwargs['doi'])

        elif 'doi' not in kwargs and 'manuscript_id' in kwargs:
            kwargs['doi'] = msid2doi(kwargs['manuscript_id'])

        context = {'article': kwargs['doi']}

        LOG.info("importing Article", extra=context)

        # post process data
        kwargs.update({
            'journal': journal,
            'volume': int(kwargs['volume']),
            'type': kwargs['article-type'],
        })
        delall(kwargs, ['path', 'article-type'])
    except KeyError:
        raise ValueError("expected keys invalid/not present: %s" % ", ".join(expected_keys))

    # attempt to insert
    article_key = subdict(kwargs, ['doi', 'version'])
    try:
        article_obj = models.Article.objects.get(**article_key)
        avobj = import_article_version(article_obj, article_data, create, update)
        LOG.info("Article exists, updating", extra=context)
        for key, val in kwargs.items():
            setattr(article_obj, key, val)
        article_obj.save()
        return article_obj, avobj

    except models.Article.DoesNotExist:
        # we've been told not to create new articles.
        # this is now a legitimate exception
        if not create:
            raise
    article_obj = models.Article(**kwargs)
    article_obj.save()
    avobj = import_article_version(article_obj, article_data, create, update)
    LOG.info("created new Article %s" % article_obj)
    return article_obj, avobj
Beispiel #3
0
def expand_videos(msid, video):
    gc_data = metadata(msid)  # cached on first hit
    gc_id_str = ", ".join(gc_data.keys())

    v_id = video['id']
    ensure(v_id in gc_data,
           "glencoe doesn't know %r, only %r" % (v_id, gc_id_str))

    video_data = gc_data[v_id]
    video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height'])
    video_data = utils.renkeys(video_data, [('jpg_href', 'image')])

    func = lambda mtype: {
        'mediaType': SOURCES[mtype],
        'uri': gc_data[v_id][mtype + "_href"]
    }
    video_data['sources'] = lmap(func, SOURCES)
    video.update(video_data)

    del video['uri']  # returned by elife-tools, not part of spec

    # Add placeholder, the video thumbnail image
    video["placeholder"] = {}
    video["placeholder"]["uri"] = video["image"].split('/')[-1]
    video["placeholder"]["alt"] = ""

    return video
def expand_videos(msid, video):
    gc_data = metadata(msid) # cached on first hit
    gc_id_str = ", ".join(gc_data.keys())

    v_id = video['id']
    ensure(gc_data, "glencoe doesn't know %r, it doesn't have any media")
    ensure(v_id in gc_data, "glencoe doesn't know %r, only %r" % (v_id, gc_id_str))

    video_data = gc_data[v_id]
    video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height'])
    video_data = utils.renkeys(video_data, [('jpg_href', 'image')])

    func = lambda mtype: OrderedDict([
        ('mediaType', SOURCES[mtype]),
        ('uri', gc_data[v_id][mtype + "_href"])
    ])
    video_data['sources'] = lmap(func, SOURCES)
    video.update(video_data)

    del video['uri'] # returned by elife-tools, not part of spec

    # Add placeholder, the video thumbnail image
    video["placeholder"] = OrderedDict()
    video["placeholder"]["uri"] = video["image"].split('/')[-1]
    video["placeholder"]["alt"] = ""

    return video
Beispiel #5
0
def mkresponse(status, message, request={}, **kwargs):
    packet = {
        "status": status,
        "message": message,
        "id": None,
        "token": None,
        "datetime": datetime.now(),
    }

    request = subdict(request, ['id', 'token', 'action'])
    packet.update(request)

    # merge in any explicit overrides
    packet.update(kwargs)

    # more response wrangling
    packet = renkeys(packet, [("action", "requested-action"),
                              ("dry-run", "validate-only")])

    # remove any keys not supported in the schema
    supported_keys = conf.RESPONSE_SCHEMA['properties'].keys()
    packet = subdict(packet, supported_keys)

    # wrangle log context
    context = renkeys(packet, [("message", "status-message")])
    levels = {
        INVALID: logging.ERROR,
        ERROR: logging.ERROR,
        VALIDATED: logging.INFO,
        INGESTED: logging.DEBUG,
        PUBLISHED: logging.DEBUG
    }
    LOG.log(levels[packet["status"]],
            "%s response",
            packet['status'],
            extra=context)

    # success messages are None
    if not packet['message']:
        del packet['message']

    # double-publications are successful
    if kwargs.get('code') == 'already-published':
        packet['status'] = PUBLISHED

    return packet
def mkresponse(status, message, request={}, **kwargs):
    packet = {
        "status": status,
        "message": message,
        "id": None,
        "token": None,
        "datetime": datetime.now(),
    }

    request = subdict(request, ['id', 'token', 'action'])
    packet.update(request)

    # merge in any explicit overrides
    packet.update(kwargs)

    # more response wrangling
    packet = renkeys(packet, [
        ("action", "requested-action"),
        ("dry-run", "validate-only")
    ])

    # remove any keys not supported in the schema
    supported_keys = conf.RESPONSE_SCHEMA['properties'].keys()
    packet = subdict(packet, supported_keys)

    # wrangle log context
    context = renkeys(packet, [("message", "status-message")])
    levels = {
        INVALID: logging.ERROR,
        ERROR: logging.ERROR,
        VALIDATED: logging.INFO,
        INGESTED: logging.DEBUG,
        PUBLISHED: logging.DEBUG
    }
    LOG.log(levels[packet["status"]], "%s response", packet['status'], extra=context)

    # success messages are None
    if not packet['message']:
        del packet['message']

    # double-publications are successful
    if kwargs.get('code') == 'already-published':
        packet['status'] = PUBLISHED

    return packet
Beispiel #7
0
def _get_or_create(session, model, fbid,  **kwargs):
    instance = session.query(model).filter_by(fbid=fbid).first()
    created = False
    if not instance:
        created = True
        data = subdict(kwargs, keys=model.__table__.columns.keys(), exclude=('id',))
        instance = model(fbid=fbid, **data)
        session.add(instance)
    return instance, created
Beispiel #8
0
def patch2(patch_data):
    Model = getattr(models, patch_data['model'])
    key_list = patch_data['-key']
    val_list = subdict(patch_data, key_list)
    try:
        utils.create_or_update(Model, patch_data, key_list, create=False, update=True)
        LOG.info("successfully patched %s", val_list)
        return True
    except Model.DoesNotExist:
        LOG.warn("%s not found, skipping patch", patch_data['model'])
        return False
    except Exception as err:
        LOG.error("unhandled exception attempting to patch %s: %s", val_list, err)
def best_parameters_callback(job, parameters=None):

  if not np.any(np.isnan(job.get_result())) and (np.any(np.isnan(parameters['result'])) or (job.get_result() <= parameters['result'])):
    # New best parameter!
    parameters['result'] = job.get_result()
    parameters['job_name'] = job.job_name
    parameters['parameters'] = job.experiment_parameters
    parameters['best_parameters'] = utils.subdict(job.experiment_parameters, dict_parameters_range.keys())

    print "\n\n>>>>>> Found new best parameters: \n%s %s %s\n\n" % (parameters['best_parameters'], parameters['result'], parameters['job_name'])
    parameters['best_parameters']

    np.save('./outputs/best_params', dict(parameters=parameters))
 def phi(self, x, y):
     _frx = freeze(x), freeze(y)
     if _frx not in self._phis:
         ykeys = ['x', 'y', 'dx', 'dy']
         _phi = pymzn.minizinc(
             self.mzn_phi,
             output_vars=['phi'],
             data={
                 **self.inputize(subdict(y, ykeys), ykeys),
                 **x
             },
             solver=pymzn.opturion)[0]['phi']
         self._phis[_frx] = np.array(_phi, dtype=np.float64)
     return self._phis[_frx]
Beispiel #11
0
def _add_comments(session, post_id, comment_data):
    # Get or create commenter.
    commenter_data = comment_data['from']
    commenter, created = _get_or_create(
            session, User, fbid=commenter_data['id'])
    if created:
        commenter.name = commenter_data['name']
        session.flush()
    # Initialize comment to save it further.
    comment_fbid = comment_data['id']
    comment_data = subdict(
            comment_data,
            keys=Comment.__table__.columns.keys(),
            exclude=('id',))
    comment_data['created_time'] = convert_date(comment_data['created_time'])
    session.add(Comment(from_id=commenter.id, fbid=comment_fbid, post_id=post_id, **comment_data))
Beispiel #12
0
def _clean_post_data(data_dict):
    """
    Extract post data from data dict.
    """
    data = subdict(data_dict, keys=Post.__table__.columns.keys())
    data['fbid'] = data['id']
    del data['id']
    if 'shares' in data:
        data['shares'] = data['shares']['count']
    data['created_time'] = convert_date(data['created_time'])
    data['updated_time'] = convert_date(data['updated_time'])
    if 'privacy' in data_dict:
        privacy_data = data_dict['privacy']
        if 'value' in privacy_data:
            data['privacy_value'] = privacy_data['value']
        if 'allow' in privacy_data:
            data['allow'] = privacy_data['allow']
        if 'deny' in privacy_data:
            data['deny'] =  privacy_data['deny']
    return data
    def _tracks(files):
        """Returns a sorted list of tracks given Archive.org item
        (concert) metadata files
        """
        def sort_tracks(tracks):
            for i in range(len(tracks)):
                try:
                    tracks[i]['track'] = int(tracks[i].get('track', "1")
                                             .split("/")[0])
                except:
                    tracks[i]['track'] = 1
            return sorted(tracks, key=lambda t: t['track'])

        def get_filetype(files):
            available = set(f.get('name', '').lower()
                            .rsplit('.', 1)[-1] for f in files)
            return next(ft if ft in available else
                        False for ft in FILETYPE_PRIORITY)

        ts = []
        filetype = get_filetype(files)

        if not filetype:
            return {}  # better error handling required

        for f in files:
            try:
                track = subdict(f, REQUIRED_KEYS)
            except KeyError as e:
                continue  # Skip if track doesn't have required keys

            if track['name'].endswith(filetype):
                ts.append(track)

        try:
            return sort_tracks(ts)
        except ValueError as e:
            print(e)
        return ts
def best_parameters_callback(job, parameters=None):

    if not np.any(np.isnan(job.get_result())) and (
        np.any(np.isnan(parameters["result"])) or (job.get_result() <= parameters["result"])
    ):
        # New best parameter!
        parameters["result"] = job.get_result()
        parameters["job_name"] = job.job_name
        parameters["parameters"] = job.experiment_parameters
        parameters["best_parameters"] = utils.subdict(job.experiment_parameters, dict_parameters_range.keys())

        print "\n\n>>>>>> Found new best parameters: \n%s %s %s\n\n" % (
            parameters["best_parameters"],
            parameters["result"],
            parameters["job_name"],
        )

        np.save("./outputs/best_params", dict(parameters=parameters))

        # If desired, automatically create additional plots.
        if parameters.get("submit_best", False):

            pbs_submission_infos_copy = parameters["pbs_submission_infos_copy"]
            try:
                # Will check the best fitting parameters, and relaunch simulations for them, in order to get new cool plots.

                ## First do Memory curves + EM Fits
                pbs_submission_infos_copy["other_options"].update(
                    dict(
                        action_to_do="launcher_do_memory_curve_marginal_fi_withplots_live",
                        subaction="collect_responses",
                        inference_method="sample",
                        N=300,
                        num_samples=300,
                        M=M,
                        output_directory=os.path.join(simul_out_dir, "outputs"),
                        selection_method="last",
                        num_repetitions=3,
                        burn_samples=200,
                        stimuli_generation="random",
                        stimuli_generation_recall="random",
                        session_id="cmaes_fitting_experiments_relaunchs",
                        result_computation="filenameoutput",
                        label="cmaes_ratiosigmaxsigmaoutput_fitting_experiment_rerun_280314",
                    )
                )
                pbs_submission_infos_copy["walltime"] = "80:00:00"
                pbs_submission_infos_copy["submit_label"] = "bestparam_rerun"

                submit_pbs = submitpbs.SubmitPBS(pbs_submission_infos=pbs_submission_infos_copy, debug=True)

                # Extract the parameters to try
                best_params_resend = [utils.subdict(job.experiment_parameters, dict_parameters_range.keys())]

                # Submit without waiting
                print "Submitting extra job for Plots, parameters:", best_params_resend
                submission_parameters_dict = dict(
                    pbs_submission_infos=pbs_submission_infos_copy, submit_jobs=submit_jobs, wait_jobs_completed=False
                )
                submit_pbs.submit_minibatch_jobswrapper(best_params_resend, submission_parameters_dict)

            except Exception as e:
                print "Failure while submitting sub-task for best parameter. Continuing anyway."
                print parameters
                print e
Beispiel #15
0
def post_xml():
    "upload jats xml, generate xml, validate, send to lax as a dry run"
    http_ensure('xml' in request.files, "xml file required", 400)

    try:
        override = scraper.deserialize_overrides(
            request.form.getlist('override'))
    except ValueError:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_OVERRIDES,
            'message':
            'an error occurred attempting to parse your given overrides.',
            'trace': sio.getvalue()
        }, 400

    # upload
    try:
        xml = request.files['xml']
        filename = os.path.basename(xml.filename)
        http_ensure(
            os.path.splitext(filename)[1] == '.xml',
            "file doesn't look like xml")
        path = join(upload_folder(), filename)
        xml.save(path)

    except Exception:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_UPLOAD,
            'message':
            'an error occured uploading the article xml to be processed',
            'trace': sio.getvalue(),
        }, 400  # shouldn't this be a 500? everything is always the client's fault.

    # generate
    try:
        article_json = scraper.main(path, {
            'override': override,
            'fill-missing-image-dimensions': True
        })
        json_filename = filename + '.json'
        json_path = join(upload_folder(), json_filename)
        open(json_path, 'w').write(article_json)

    except Exception as err:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_SCRAPE,
            'message': str(err),
            'trace': sio.getvalue()
        }, 400

    # validate
    try:
        conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r'))

    except jsonschema.ValidationError as err:
        return {
            'status': conf.INVALID,
            'code': conf.ERROR_INVALID,
            'message':
            'the generated article-json failed validation, see trace for details.',
            'trace': str(err),  # todo: any good?
        }, 400

    except Exception:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_VALIDATING,
            'message':
            'an error occurred attempting to validate the generated article-json',
            'trace': sio.getvalue()
        }, 400  # TODO: shouldn't this be a 500?

    # send to lax
    try:
        #msid, version = utils.version_from_path(filename)
        msid = request.args['id']
        version = request.args['version']
        token = str(uuid.uuid4())
        args = {
            # the *most* important parameter. don't modify lax.
            'dry_run': True,

            # a forced ingest by default
            'action': conf.INGEST,
            'force': True,

            # article details
            'msid': msid,
            'version': int(version),
            'article_json': article_json,
            'token': token,
        }
        lax_resp = adaptor.call_lax(**args)

        context = utils.renkeys(lax_resp, [("message", "lax-message")])
        LOG.info("lax response", extra=context)

        api_resp = utils.subdict(lax_resp,
                                 ['status', 'code', 'message', 'trace'])

        if api_resp['status'] in [conf.INVALID, conf.ERROR]:
            # failure
            return api_resp, 400

        # success
        # 'code', 'message' and 'trace' are not returned by lax on success, just 'status'
        api_resp['ajson'] = json.loads(article_json)['article']
        api_resp['override'] = override
        return api_resp, 200

    except Exception:
        # lax returned something indecipherable
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_COMMUNICATING,
            'message': "lax responded with something that couldn't be decoded",
            'trace': sio.getvalue(),
        }, 400  # TODO: shouldn't this be a 500?
Beispiel #16
0
 def fn(element, missing):
     missing.append(utils.subdict(element, ['type', 'id', 'uri']))
     element["image"]["uri"] += ".tif"
     return element
Beispiel #17
0
 def fn(element, missing):
     missing.append(utils.subdict(element, ['type', 'id', 'uri']))
     element["image"]["uri"] += ".tif"
     return element
Beispiel #18
0
def _add_post(session, data_dict):

    """
    Add post and related objects from data dict to session
    and flush.

    Do nothing if post with that ID from data dict already exists.
    """

    # Add user.
    if 'from' in data_dict:
        user_data = data_dict['from']
        user, _ = _get_or_create(session, User, fbid=user_data['id'], **user_data)
    else:
        user = User()
    # Add application.
    if 'application' in data_dict:
        app_data = data_dict['application']
        app, _ = _get_or_create(session, Application, fbid=app_data['id'], **app_data)
    else:
        app = Application()
    # Add place.
    if 'place' in data_dict:
        place_data = _clean_place_data(data_dict)
        place, _ = _get_or_create(session, Place, **place_data)
    else:
        place = Place()
    session.flush()

    # Create post.
    post_data = _clean_post_data(data_dict)
    post = Post(
        from_id=user.id,
        application_id=app.id,
        place_id=place.id,
        **post_data)
    session.add(post)
    session.flush()

    # Add post properties.
    if 'properties' in data_dict:
        for property_data in data_dict['properties']:
            property_data = subdict(property_data, Property.__table__.columns.keys())
            session.add(Property(post_id=post.id, **property_data))
    # Add post actions.
    if 'actions' in data_dict:
        for action_data in data_dict['actions']:
            action_data = subdict(action_data, Action.__table__.columns.keys())
            session.add(Action(post_id=post.id, **action_data))
    # Add post metadata.
    if 'metadata' in data_dict:
        if 'connections' in data_dict['metadata']:
            for name, link in data_dict['metadata']['connections'].items():
                session.add(Connection(post_id=post.id, name=name, link=link))
    session.flush()


    # Add post comments.
    if 'comments' in data_dict:
        for comment_data in _get_all(data_dict['id'], 'comments', start_with=data_dict['comments']):
            _add_comments(session, post.id, comment_data)

    # Add post likes.
    if 'likes' in data_dict:
        for like_data in _get_all(data_dict['id'], 'likes', start_with=data_dict['likes']):
            _add_likes(session, post.id, like_data)

    session.flush()
def handler(json_request, outgoing):
    response = partial(send_response, outgoing)

    try:
        request = utils.validate(json_request, conf.REQUEST_SCHEMA)
    except ValueError:
        # bad data. who knows what it was. die
        return response(mkresponse(ERROR, "request could not be parsed: %s" % json_request))

    except ValidationError as err:
        # data is readable, but it's in an unknown/invalid format. die
        return response(mkresponse(ERROR, "request was incorrectly formed: %s" % str(err)))

    except Exception as err:
        # die
        msg = "unhandled error attempting to handle request: %s" % str(err)
        return response(mkresponse(ERROR, msg))

    # we have a valid request :)
    LOG.info("valid request")

    params = subdict(request, ['action', 'id', 'token', 'version', 'force', 'validate-only'])
    params = renkeys(params, [('validate-only', 'dry_run'), ('id', 'msid')])

    # if we're to ingest/publish, then we expect a location to download article data
    if params['action'] in [INGEST, INGEST_PUBLISH]:
        try:
            article_xml = download(request['location'])
            if not article_xml:
                raise ValueError("no article content available")

        except AssertionError as err:
            msg = "refusing to download article xml: %s" % str(err)
            return response(mkresponse(ERROR, msg, request))

        except Exception as err:
            msg = "failed to download article xml from %r: %s" % (request['location'], str(err))
            return response(mkresponse(ERROR, msg, request))

        LOG.info("got xml")

        try:
            article_data = scraper.render_single(article_xml,
                                                 version=params['version'],
                                                 location=request['location'])
            LOG.info("rendered article data ")

        except Exception as err:
            error = str(err) if hasattr(err, 'message') else err
            msg = "failed to render article-json from article-xml: %s" % error
            LOG.exception(msg, extra=params)
            return response(mkresponse(ERROR, msg, request))

        LOG.info("successful scrape")

        try:
            article_json = utils.json_dumps(article_data)
        except ValueError as err:
            msg = "failed to serialize article data to article-json: %s" % str(err)
            return response(mkresponse(ERROR, msg, request))

        LOG.info("successfully serialized article-data to article-json")

        # phew! gauntlet ran, we're now confident of passing this article-json to lax
        # lax may still reject the data as invalid, but we'll proxy that back if necessary
        params['article_json'] = article_json

    try:
        LOG.info("calling lax")

        lax_response = call_lax(**params)

        LOG.info("lax response: %r", lax_response)

        return response(mkresponse(**lax_response))

    except Exception as err:
        # lax didn't understand us or broke
        msg = "lax failed attempting to handle our request: %s" % str(err)
        response(mkresponse(ERROR, msg, request))
        # when lax fails, we fail
        raise
Beispiel #20
0
def handler(json_request, outgoing):
    response = partial(send_response, outgoing)

    try:
        request = utils.validate(json_request, conf.REQUEST_SCHEMA)
    except ValueError as err:
        # bad data. who knows what it was. die
        return response(
            mkresponse(ERROR,
                       "request could not be parsed: %s" % json_request))

    except ValidationError as err:
        # data is readable, but it's in an unknown/invalid format. die
        return response(
            mkresponse(ERROR, "request was incorrectly formed: %s" % str(err)))

    except Exception as err:
        # die
        msg = "unhandled error attempting to handle request: %s" % str(err)
        return response(mkresponse(ERROR, msg))

    # we have a valid request :)
    LOG.info("valid request")

    params = subdict(
        request,
        ['action', 'id', 'token', 'version', 'force', 'validate-only'])
    params = renkeys(params, [('validate-only', 'dry_run')])

    # if we're to ingest/publish, then we expect a location to download article data
    if params['action'] in [INGEST, INGEST_PUBLISH]:
        try:
            article_xml = download(request['location'])
            if not article_xml:
                raise ValueError("no article content available")

        except AssertionError as err:
            msg = "refusing to download article xml: %s" % str(err)
            return response(mkresponse(ERROR, msg, request))

        except Exception as err:
            msg = "failed to download article xml from %r: %s" % (
                request['location'], str(err))
            return response(mkresponse(ERROR, msg, request))

        LOG.info("got xml")

        try:
            article_data = scraper.render_single(article_xml,
                                                 version=params['version'],
                                                 location=request['location'])
            LOG.info("rendered article data ")

        except Exception as err:
            error = str(err) if hasattr(err, 'message') else err
            msg = "failed to render article-json from article-xml: %s" % error
            LOG.exception(msg, extra=params)
            return response(mkresponse(ERROR, msg, request))

        LOG.info("successful scrape")

        try:
            article_json = utils.json_dumps(article_data)
        except ValueError as err:
            msg = "failed to serialize article data to article-json: %s" % str(
                err)
            return response(mkresponse(ERROR, msg, request))

        LOG.info("successfully serialized article-data to article-json")

        # phew! gauntlet ran, we're now confident of passing this article-json to lax
        # lax may still reject the data as invalid, but we'll proxy that back if necessary
        params['article_json'] = article_json

    try:
        LOG.info("calling lax")  # with params: %r" % params)

        lax_response = call_lax(**params)

        LOG.info("lax response: %r", lax_response)

        return response(mkresponse(**lax_response))

    except Exception as err:
        # lax didn't understand us or broke
        msg = "lax failed attempting to handle our request: %s" % str(err)
        response(mkresponse(ERROR, msg, request))
        # when lax fails, we fail
        raise
Beispiel #21
0
def import_article_version(article, article_data, create=True, update=False):
    expected_keys = ['title', 'version', 'update', 'pub-date', 'status']
    kwargs = subdict(article_data, expected_keys)

    try:
        doi = article_data['doi']
        version = int(kwargs['version'])
        version_date = kwargs.get('update')
        datetime_published = kwargs['pub-date']

        context = {'article': doi, 'version': version}
        LOG.info("importing ArticleVersion", extra=context)

        if version_date and version == 1:
            # this is so common it's not even worth a debug
            #LOG.warn("inconsistency: a v1 has an 'update' date", extra=context)

            d1, d2 = striptz(version_date), striptz(datetime_published)
            if d1 != d2:
                c = {}
                c.update(context)
                c.update({'pub-date': datetime_published, 'update': version_date})
                LOG.warn("double inconsistency: not only do we have an 'update' date for a v1, it doesn't match the date published", extra=c)

                # 'update' date occurred before publish date ...
                if d1 < d2:
                    LOG.warn("triple inconsistency: not only do we have an 'update' date for a v1 that doesn't match the date published, it was actually updated *before* it was published", extra=c)

        if version == 1:
            version_date = datetime_published

        if not version_date and version > 1:
            LOG.warn("inconsistency: a version > 1 does not have an 'update' date", extra=context)
            if settings.FAIL_ON_NO_UPDATE_DATE:
                msg = "no 'update' date found for ArticleVersion"
                raise ValueError(msg)
            msg = "no 'update' date found for ArticleVersion, using None instead"
            LOG.warn(msg, extra=context)
            version_date = None

        # post process data
        kwargs.update({
            'article': article,
            'version': version,
            'datetime_published': todt(version_date),
            'status': kwargs['status'].lower(),
        })
        delall(kwargs, ['pub-date', 'update'])
    except KeyError:
        LOG.error("expected keys invalid/not present",
                  extra={'expected_keys': expected_keys})
        raise

    try:
        avobj = models.ArticleVersion.objects.get(article=article, version=kwargs['version'])
        if not update:
            msg = "Article with version does exists but update == False"
            LOG.warn(msg, extra=context)
            raise AssertionError(msg)
        LOG.debug("ArticleVersion found, updating")
        for key, val in kwargs.items():
            setattr(avobj, key, val)
        avobj.save()
        LOG.info("updated existing ArticleVersion", extra=context)
        return avobj

    except models.ArticleVersion.DoesNotExist:
        if not create:
            msg = "ArticleVersion with version does not exist and create == False"
            LOG.warn(msg, extra=context)
            raise

    LOG.debug("ArticleVersion NOT found, creating", extra=context)
    avobj = models.ArticleVersion(**kwargs)
    avobj.save()
    LOG.info("created new ArticleVersion", extra=context)
    return avobj
def best_parameters_callback(job, parameters=None):

    try:

        submit_current_parameters = False
        parameters['cnt_parameters_seen'] = parameters['cnt_parameters_seen'] + 1

        if not np.any(np.isnan(job.get_result())):
            if parameters['cnt_parameters_seen'] % parameters['plot_every'] == 0:
                # Let's plot again.
                print "\n\n >>>>> Will run new submission with these parameters: %s, fitness: %f \n\n" % (utils.subdict(job.experiment_parameters, dict_parameters_range.keys()), job.get_result())

                np.save('./outputs/curr_params', dict(parameters=parameters))

                submit_current_parameters = True

            if (np.any(np.isnan(parameters['result'])) or (job.get_result() <= parameters['result'])):
                # New best parameter!
                parameters['result'] = job.get_result()
                parameters['job_name'] = job.job_name
                parameters['parameters'] = job.experiment_parameters
                parameters['best_parameters'] = utils.subdict(job.experiment_parameters, dict_parameters_range.keys())

                print "\n\n>>>>>> Found new best parameters: \n%s %s %s\n\n" % (parameters['best_parameters'], parameters['result'], parameters['job_name'])

                np.save('./outputs/best_params', dict(parameters=parameters))

                submit_current_parameters = True

            # If desired, automatically create additional plots.
            if parameters.get('submit_parameters', False) and submit_current_parameters:

                pbs_submission_infos_copy = parameters['pbs_submission_infos_copy']
                # Will check the best fitting parameters, and relaunch simulations for them, in order to get new cool plots.

                ## First do Memory curves + EM Fits
                pbs_submission_infos_copy['other_options'].update(dict(
                    action_to_do='launcher_do_memory_curve_marginal_fi_withplots_live',
                    subaction='collect_responses',
                    collect_responses=None,
                    inference_method='sample',
                    N=300,
                    num_samples=200,
                    M=100,
                    output_directory=os.path.join(simul_out_dir, 'outputs'),
                    selection_method='last',
                    num_repetitions=3,
                    renormalize_sigmax=None,
                    autoset_parameters=None,
                    burn_samples=100,
                    stimuli_generation='random',
                    stimuli_generation_recall='random',
                    session_id='cmaes_fitting_experiments_relaunchs',
                    result_computation='filenameoutput',
                    label='cmaes_Mratiosigmaxsigmaoutput_fitting_experiment_rerun_290814'))
                pbs_submission_infos_copy['walltime'] = '70:00:00'
                pbs_submission_infos_copy['submit_label'] = 'param_rerun'
                pbs_submission_infos_copy['qos'] = 'auto'

                submit_pbs = submitpbs.SubmitPBS(pbs_submission_infos=pbs_submission_infos_copy, debug=True)

                # Extract the parameters to try
                params_resend = [utils.subdict(job.experiment_parameters, dict_parameters_range.keys())]

                # Submit without waiting
                print "Submitting extra job for Plots, parameters:", params_resend
                submission_parameters_dict = dict(pbs_submission_infos=pbs_submission_infos_copy, submit_jobs=submit_jobs, wait_jobs_completed=False)
                submit_pbs.submit_minibatch_jobswrapper(params_resend, submission_parameters_dict)

    except Exception as e:
        print "Failure while submitting sub-task for current parameters. Continuing anyway."
        print parameters
        print e
Beispiel #23
0
def post_xml():
    "upload jats xml, generate xml, validate, send to lax as a dry run"
    http_ensure('xml' in request.files, "xml file required", 400)

    try:
        override = scraper.deserialize_overrides(request.form.getlist('override'))
    except ValueError:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_OVERRIDES,
            'message': 'an error occurred attempting to parse your given overrides.',
            'trace': sio.getvalue()
        } # shouldn't this be a 400?

    # upload
    try:
        xml = request.files['xml']
        filename = os.path.basename(xml.filename)
        http_ensure(os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml")
        path = join(upload_folder(), filename)
        xml.save(path)

    except Exception:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_UPLOAD,
            'message': 'an error occured uploading the article xml to be processed',
            'trace': sio.getvalue(),
        }, 400 # shouldn't this be a 500?

    # generate
    try:
        article_json = scraper.main(path, {
            'override': override,
            'fill-missing-image-dimensions': True
        })
        json_filename = filename + '.json'
        json_path = join(upload_folder(), json_filename)
        open(json_path, 'w').write(article_json)

    except Exception as err:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.BAD_SCRAPE,
            'message': str(err),
            'trace': sio.getvalue()
        }, 400

    # validate
    try:
        conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r'))

    except jsonschema.ValidationError as err:
        return {
            'status': conf.INVALID,
            'code': conf.ERROR_INVALID,
            'message': 'the generated article-json failed validation, see trace for details.',
            'trace': str(err), # todo: any good?
        }, 400

    except Exception:
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_VALIDATING,
            'message': 'an error occurred attempting to validate the generated article-json',
            'trace': sio.getvalue()
        }, 400 # TODO: shouldn't this be a 500?

    # send to lax
    try:
        #msid, version = utils.version_from_path(filename)
        msid = request.args['id']
        version = request.args['version']
        token = str(uuid.uuid4())
        args = {
            # the *most* important parameter. don't modify lax.
            'dry_run': True,

            # a forced ingest by default
            'action': conf.INGEST,
            'force': True,

            # article details
            'msid': msid,
            'version': int(version),
            'article_json': article_json,

            'token': token,
        }
        lax_resp = adaptor.call_lax(**args)

        context = utils.renkeys(lax_resp, [("message", "lax-message")])
        LOG.info("lax response", extra=context)

        api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace'])

        if api_resp['status'] in [conf.INVALID, conf.ERROR]:
            # failure
            return api_resp, 400

        # success
        # 'code', 'message' and 'trace' are not returned by lax on success, just 'status'
        api_resp['ajson'] = json.loads(article_json)['article']
        api_resp['override'] = override
        return api_resp, 200

    except Exception:
        # lax returned something indecipherable
        sio = StringIO()
        traceback.print_exc(file=sio)
        return {
            'status': conf.ERROR,
            'code': conf.ERROR_COMMUNICATING,
            'message': "lax responded with something that couldn't be decoded",
            'trace': sio.getvalue(),
        }, 400 # TODO: shouldn't this be a 500?