def best_parameters_callback(job, parameters=None): if not np.any(np.isnan(job.get_result())) and (np.any(np.isnan(parameters['result'])) or (job.get_result() <= parameters['result'])): # New best parameter! parameters['result'] = job.get_result() parameters['job_name'] = job.job_name parameters['parameters'] = job.experiment_parameters parameters['best_parameters'] = utils.subdict(job.experiment_parameters, dict_parameters_range.keys()) print "\n\n>>>>>> Found new best parameters: \n%s %s %s\n\n" % (parameters['best_parameters'], parameters['result'], parameters['job_name']) parameters['best_parameters'] np.save('./outputs/best_params', dict(parameters=parameters)) # If desired, automatically create additional plots. if parameters.get('submit_best', False): pbs_submission_infos_copy = parameters['pbs_submission_infos_copy'] try: # Will check the best fitting parameters, and relaunch simulations for them, in order to get new cool plots. curr_params_label = '_'.join(["%s%.2f" % (k.replace('_', ''), v) for k, v in parameters['best_parameters'].iteritems()]) ## First do Memory curves + EM Fits pbs_submission_infos_copy['other_options'].update(dict( action_to_do='launcher_do_memory_curve_marginal_fi_withplots_live', subaction='collect_responses', inference_method='sample', N=300, T=6, num_samples=300, output_directory=os.path.join(simul_out_dir, 'outputs'), selection_method='last', num_repetitions=10, burn_samples=200, stimuli_generation='random', stimuli_generation_recall='random', session_id='cmaes_bays09_7try_rerun_080816', result_computation='filenameoutput', label='%s_cmaes_bays09_7try_080816' % (curr_params_label) )) pbs_submission_infos_copy['walltime'] = '40:00:00' pbs_submission_infos_copy['submit_label'] = 'bestparam_rerun' submit_pbs = submitpbs.SubmitPBS(pbs_submission_infos=pbs_submission_infos_copy, debug=True) # Extract the parameters to try best_params_resend = [utils.subdict(job.experiment_parameters, dict_parameters_range.keys())] # Submit without waiting print "Submitting extra job for Plots, parameters:", best_params_resend submission_parameters_dict = dict(pbs_submission_infos=pbs_submission_infos_copy, submit_jobs=submit_jobs, wait_jobs_completed=False) submit_pbs.submit_minibatch_jobswrapper(best_params_resend, submission_parameters_dict) except Exception as e: print "Failure while submitting sub-task for best parameter. Continuing anyway." print parameters print e
def import_article(journal, article_data, create=True, update=False): if not article_data or not isinstance(article_data, dict): raise ValueError("given data to import is empty/invalid") expected_keys = ['doi', 'volume', 'path', 'article-type', 'manuscript_id'] # data wrangling try: kwargs = subdict(article_data, expected_keys) # JATS XML doesn't contain the manuscript ID. derive it from doi if 'manuscript_id' not in kwargs and 'doi' in kwargs: kwargs['manuscript_id'] = doi2msid(kwargs['doi']) elif 'doi' not in kwargs and 'manuscript_id' in kwargs: kwargs['doi'] = msid2doi(kwargs['manuscript_id']) context = {'article': kwargs['doi']} LOG.info("importing Article", extra=context) # post process data kwargs.update({ 'journal': journal, 'volume': int(kwargs['volume']), 'type': kwargs['article-type'], }) delall(kwargs, ['path', 'article-type']) except KeyError: raise ValueError("expected keys invalid/not present: %s" % ", ".join(expected_keys)) # attempt to insert article_key = subdict(kwargs, ['doi', 'version']) try: article_obj = models.Article.objects.get(**article_key) avobj = import_article_version(article_obj, article_data, create, update) LOG.info("Article exists, updating", extra=context) for key, val in kwargs.items(): setattr(article_obj, key, val) article_obj.save() return article_obj, avobj except models.Article.DoesNotExist: # we've been told not to create new articles. # this is now a legitimate exception if not create: raise article_obj = models.Article(**kwargs) article_obj.save() avobj = import_article_version(article_obj, article_data, create, update) LOG.info("created new Article %s" % article_obj) return article_obj, avobj
def expand_videos(msid, video): gc_data = metadata(msid) # cached on first hit gc_id_str = ", ".join(gc_data.keys()) v_id = video['id'] ensure(v_id in gc_data, "glencoe doesn't know %r, only %r" % (v_id, gc_id_str)) video_data = gc_data[v_id] video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height']) video_data = utils.renkeys(video_data, [('jpg_href', 'image')]) func = lambda mtype: { 'mediaType': SOURCES[mtype], 'uri': gc_data[v_id][mtype + "_href"] } video_data['sources'] = lmap(func, SOURCES) video.update(video_data) del video['uri'] # returned by elife-tools, not part of spec # Add placeholder, the video thumbnail image video["placeholder"] = {} video["placeholder"]["uri"] = video["image"].split('/')[-1] video["placeholder"]["alt"] = "" return video
def expand_videos(msid, video): gc_data = metadata(msid) # cached on first hit gc_id_str = ", ".join(gc_data.keys()) v_id = video['id'] ensure(gc_data, "glencoe doesn't know %r, it doesn't have any media") ensure(v_id in gc_data, "glencoe doesn't know %r, only %r" % (v_id, gc_id_str)) video_data = gc_data[v_id] video_data = utils.subdict(video_data, ['jpg_href', 'width', 'height']) video_data = utils.renkeys(video_data, [('jpg_href', 'image')]) func = lambda mtype: OrderedDict([ ('mediaType', SOURCES[mtype]), ('uri', gc_data[v_id][mtype + "_href"]) ]) video_data['sources'] = lmap(func, SOURCES) video.update(video_data) del video['uri'] # returned by elife-tools, not part of spec # Add placeholder, the video thumbnail image video["placeholder"] = OrderedDict() video["placeholder"]["uri"] = video["image"].split('/')[-1] video["placeholder"]["alt"] = "" return video
def mkresponse(status, message, request={}, **kwargs): packet = { "status": status, "message": message, "id": None, "token": None, "datetime": datetime.now(), } request = subdict(request, ['id', 'token', 'action']) packet.update(request) # merge in any explicit overrides packet.update(kwargs) # more response wrangling packet = renkeys(packet, [("action", "requested-action"), ("dry-run", "validate-only")]) # remove any keys not supported in the schema supported_keys = conf.RESPONSE_SCHEMA['properties'].keys() packet = subdict(packet, supported_keys) # wrangle log context context = renkeys(packet, [("message", "status-message")]) levels = { INVALID: logging.ERROR, ERROR: logging.ERROR, VALIDATED: logging.INFO, INGESTED: logging.DEBUG, PUBLISHED: logging.DEBUG } LOG.log(levels[packet["status"]], "%s response", packet['status'], extra=context) # success messages are None if not packet['message']: del packet['message'] # double-publications are successful if kwargs.get('code') == 'already-published': packet['status'] = PUBLISHED return packet
def mkresponse(status, message, request={}, **kwargs): packet = { "status": status, "message": message, "id": None, "token": None, "datetime": datetime.now(), } request = subdict(request, ['id', 'token', 'action']) packet.update(request) # merge in any explicit overrides packet.update(kwargs) # more response wrangling packet = renkeys(packet, [ ("action", "requested-action"), ("dry-run", "validate-only") ]) # remove any keys not supported in the schema supported_keys = conf.RESPONSE_SCHEMA['properties'].keys() packet = subdict(packet, supported_keys) # wrangle log context context = renkeys(packet, [("message", "status-message")]) levels = { INVALID: logging.ERROR, ERROR: logging.ERROR, VALIDATED: logging.INFO, INGESTED: logging.DEBUG, PUBLISHED: logging.DEBUG } LOG.log(levels[packet["status"]], "%s response", packet['status'], extra=context) # success messages are None if not packet['message']: del packet['message'] # double-publications are successful if kwargs.get('code') == 'already-published': packet['status'] = PUBLISHED return packet
def _get_or_create(session, model, fbid, **kwargs): instance = session.query(model).filter_by(fbid=fbid).first() created = False if not instance: created = True data = subdict(kwargs, keys=model.__table__.columns.keys(), exclude=('id',)) instance = model(fbid=fbid, **data) session.add(instance) return instance, created
def patch2(patch_data): Model = getattr(models, patch_data['model']) key_list = patch_data['-key'] val_list = subdict(patch_data, key_list) try: utils.create_or_update(Model, patch_data, key_list, create=False, update=True) LOG.info("successfully patched %s", val_list) return True except Model.DoesNotExist: LOG.warn("%s not found, skipping patch", patch_data['model']) return False except Exception as err: LOG.error("unhandled exception attempting to patch %s: %s", val_list, err)
def best_parameters_callback(job, parameters=None): if not np.any(np.isnan(job.get_result())) and (np.any(np.isnan(parameters['result'])) or (job.get_result() <= parameters['result'])): # New best parameter! parameters['result'] = job.get_result() parameters['job_name'] = job.job_name parameters['parameters'] = job.experiment_parameters parameters['best_parameters'] = utils.subdict(job.experiment_parameters, dict_parameters_range.keys()) print "\n\n>>>>>> Found new best parameters: \n%s %s %s\n\n" % (parameters['best_parameters'], parameters['result'], parameters['job_name']) parameters['best_parameters'] np.save('./outputs/best_params', dict(parameters=parameters))
def phi(self, x, y): _frx = freeze(x), freeze(y) if _frx not in self._phis: ykeys = ['x', 'y', 'dx', 'dy'] _phi = pymzn.minizinc( self.mzn_phi, output_vars=['phi'], data={ **self.inputize(subdict(y, ykeys), ykeys), **x }, solver=pymzn.opturion)[0]['phi'] self._phis[_frx] = np.array(_phi, dtype=np.float64) return self._phis[_frx]
def _add_comments(session, post_id, comment_data): # Get or create commenter. commenter_data = comment_data['from'] commenter, created = _get_or_create( session, User, fbid=commenter_data['id']) if created: commenter.name = commenter_data['name'] session.flush() # Initialize comment to save it further. comment_fbid = comment_data['id'] comment_data = subdict( comment_data, keys=Comment.__table__.columns.keys(), exclude=('id',)) comment_data['created_time'] = convert_date(comment_data['created_time']) session.add(Comment(from_id=commenter.id, fbid=comment_fbid, post_id=post_id, **comment_data))
def _clean_post_data(data_dict): """ Extract post data from data dict. """ data = subdict(data_dict, keys=Post.__table__.columns.keys()) data['fbid'] = data['id'] del data['id'] if 'shares' in data: data['shares'] = data['shares']['count'] data['created_time'] = convert_date(data['created_time']) data['updated_time'] = convert_date(data['updated_time']) if 'privacy' in data_dict: privacy_data = data_dict['privacy'] if 'value' in privacy_data: data['privacy_value'] = privacy_data['value'] if 'allow' in privacy_data: data['allow'] = privacy_data['allow'] if 'deny' in privacy_data: data['deny'] = privacy_data['deny'] return data
def _tracks(files): """Returns a sorted list of tracks given Archive.org item (concert) metadata files """ def sort_tracks(tracks): for i in range(len(tracks)): try: tracks[i]['track'] = int(tracks[i].get('track', "1") .split("/")[0]) except: tracks[i]['track'] = 1 return sorted(tracks, key=lambda t: t['track']) def get_filetype(files): available = set(f.get('name', '').lower() .rsplit('.', 1)[-1] for f in files) return next(ft if ft in available else False for ft in FILETYPE_PRIORITY) ts = [] filetype = get_filetype(files) if not filetype: return {} # better error handling required for f in files: try: track = subdict(f, REQUIRED_KEYS) except KeyError as e: continue # Skip if track doesn't have required keys if track['name'].endswith(filetype): ts.append(track) try: return sort_tracks(ts) except ValueError as e: print(e) return ts
def best_parameters_callback(job, parameters=None): if not np.any(np.isnan(job.get_result())) and ( np.any(np.isnan(parameters["result"])) or (job.get_result() <= parameters["result"]) ): # New best parameter! parameters["result"] = job.get_result() parameters["job_name"] = job.job_name parameters["parameters"] = job.experiment_parameters parameters["best_parameters"] = utils.subdict(job.experiment_parameters, dict_parameters_range.keys()) print "\n\n>>>>>> Found new best parameters: \n%s %s %s\n\n" % ( parameters["best_parameters"], parameters["result"], parameters["job_name"], ) np.save("./outputs/best_params", dict(parameters=parameters)) # If desired, automatically create additional plots. if parameters.get("submit_best", False): pbs_submission_infos_copy = parameters["pbs_submission_infos_copy"] try: # Will check the best fitting parameters, and relaunch simulations for them, in order to get new cool plots. ## First do Memory curves + EM Fits pbs_submission_infos_copy["other_options"].update( dict( action_to_do="launcher_do_memory_curve_marginal_fi_withplots_live", subaction="collect_responses", inference_method="sample", N=300, num_samples=300, M=M, output_directory=os.path.join(simul_out_dir, "outputs"), selection_method="last", num_repetitions=3, burn_samples=200, stimuli_generation="random", stimuli_generation_recall="random", session_id="cmaes_fitting_experiments_relaunchs", result_computation="filenameoutput", label="cmaes_ratiosigmaxsigmaoutput_fitting_experiment_rerun_280314", ) ) pbs_submission_infos_copy["walltime"] = "80:00:00" pbs_submission_infos_copy["submit_label"] = "bestparam_rerun" submit_pbs = submitpbs.SubmitPBS(pbs_submission_infos=pbs_submission_infos_copy, debug=True) # Extract the parameters to try best_params_resend = [utils.subdict(job.experiment_parameters, dict_parameters_range.keys())] # Submit without waiting print "Submitting extra job for Plots, parameters:", best_params_resend submission_parameters_dict = dict( pbs_submission_infos=pbs_submission_infos_copy, submit_jobs=submit_jobs, wait_jobs_completed=False ) submit_pbs.submit_minibatch_jobswrapper(best_params_resend, submission_parameters_dict) except Exception as e: print "Failure while submitting sub-task for best parameter. Continuing anyway." print parameters print e
def post_xml(): "upload jats xml, generate xml, validate, send to lax as a dry run" http_ensure('xml' in request.files, "xml file required", 400) try: override = scraper.deserialize_overrides( request.form.getlist('override')) except ValueError: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_OVERRIDES, 'message': 'an error occurred attempting to parse your given overrides.', 'trace': sio.getvalue() }, 400 # upload try: xml = request.files['xml'] filename = os.path.basename(xml.filename) http_ensure( os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml") path = join(upload_folder(), filename) xml.save(path) except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_UPLOAD, 'message': 'an error occured uploading the article xml to be processed', 'trace': sio.getvalue(), }, 400 # shouldn't this be a 500? everything is always the client's fault. # generate try: article_json = scraper.main(path, { 'override': override, 'fill-missing-image-dimensions': True }) json_filename = filename + '.json' json_path = join(upload_folder(), json_filename) open(json_path, 'w').write(article_json) except Exception as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_SCRAPE, 'message': str(err), 'trace': sio.getvalue() }, 400 # validate try: conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r')) except jsonschema.ValidationError as err: return { 'status': conf.INVALID, 'code': conf.ERROR_INVALID, 'message': 'the generated article-json failed validation, see trace for details.', 'trace': str(err), # todo: any good? }, 400 except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_VALIDATING, 'message': 'an error occurred attempting to validate the generated article-json', 'trace': sio.getvalue() }, 400 # TODO: shouldn't this be a 500? # send to lax try: #msid, version = utils.version_from_path(filename) msid = request.args['id'] version = request.args['version'] token = str(uuid.uuid4()) args = { # the *most* important parameter. don't modify lax. 'dry_run': True, # a forced ingest by default 'action': conf.INGEST, 'force': True, # article details 'msid': msid, 'version': int(version), 'article_json': article_json, 'token': token, } lax_resp = adaptor.call_lax(**args) context = utils.renkeys(lax_resp, [("message", "lax-message")]) LOG.info("lax response", extra=context) api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace']) if api_resp['status'] in [conf.INVALID, conf.ERROR]: # failure return api_resp, 400 # success # 'code', 'message' and 'trace' are not returned by lax on success, just 'status' api_resp['ajson'] = json.loads(article_json)['article'] api_resp['override'] = override return api_resp, 200 except Exception: # lax returned something indecipherable sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_COMMUNICATING, 'message': "lax responded with something that couldn't be decoded", 'trace': sio.getvalue(), }, 400 # TODO: shouldn't this be a 500?
def fn(element, missing): missing.append(utils.subdict(element, ['type', 'id', 'uri'])) element["image"]["uri"] += ".tif" return element
def _add_post(session, data_dict): """ Add post and related objects from data dict to session and flush. Do nothing if post with that ID from data dict already exists. """ # Add user. if 'from' in data_dict: user_data = data_dict['from'] user, _ = _get_or_create(session, User, fbid=user_data['id'], **user_data) else: user = User() # Add application. if 'application' in data_dict: app_data = data_dict['application'] app, _ = _get_or_create(session, Application, fbid=app_data['id'], **app_data) else: app = Application() # Add place. if 'place' in data_dict: place_data = _clean_place_data(data_dict) place, _ = _get_or_create(session, Place, **place_data) else: place = Place() session.flush() # Create post. post_data = _clean_post_data(data_dict) post = Post( from_id=user.id, application_id=app.id, place_id=place.id, **post_data) session.add(post) session.flush() # Add post properties. if 'properties' in data_dict: for property_data in data_dict['properties']: property_data = subdict(property_data, Property.__table__.columns.keys()) session.add(Property(post_id=post.id, **property_data)) # Add post actions. if 'actions' in data_dict: for action_data in data_dict['actions']: action_data = subdict(action_data, Action.__table__.columns.keys()) session.add(Action(post_id=post.id, **action_data)) # Add post metadata. if 'metadata' in data_dict: if 'connections' in data_dict['metadata']: for name, link in data_dict['metadata']['connections'].items(): session.add(Connection(post_id=post.id, name=name, link=link)) session.flush() # Add post comments. if 'comments' in data_dict: for comment_data in _get_all(data_dict['id'], 'comments', start_with=data_dict['comments']): _add_comments(session, post.id, comment_data) # Add post likes. if 'likes' in data_dict: for like_data in _get_all(data_dict['id'], 'likes', start_with=data_dict['likes']): _add_likes(session, post.id, like_data) session.flush()
def handler(json_request, outgoing): response = partial(send_response, outgoing) try: request = utils.validate(json_request, conf.REQUEST_SCHEMA) except ValueError: # bad data. who knows what it was. die return response(mkresponse(ERROR, "request could not be parsed: %s" % json_request)) except ValidationError as err: # data is readable, but it's in an unknown/invalid format. die return response(mkresponse(ERROR, "request was incorrectly formed: %s" % str(err))) except Exception as err: # die msg = "unhandled error attempting to handle request: %s" % str(err) return response(mkresponse(ERROR, msg)) # we have a valid request :) LOG.info("valid request") params = subdict(request, ['action', 'id', 'token', 'version', 'force', 'validate-only']) params = renkeys(params, [('validate-only', 'dry_run'), ('id', 'msid')]) # if we're to ingest/publish, then we expect a location to download article data if params['action'] in [INGEST, INGEST_PUBLISH]: try: article_xml = download(request['location']) if not article_xml: raise ValueError("no article content available") except AssertionError as err: msg = "refusing to download article xml: %s" % str(err) return response(mkresponse(ERROR, msg, request)) except Exception as err: msg = "failed to download article xml from %r: %s" % (request['location'], str(err)) return response(mkresponse(ERROR, msg, request)) LOG.info("got xml") try: article_data = scraper.render_single(article_xml, version=params['version'], location=request['location']) LOG.info("rendered article data ") except Exception as err: error = str(err) if hasattr(err, 'message') else err msg = "failed to render article-json from article-xml: %s" % error LOG.exception(msg, extra=params) return response(mkresponse(ERROR, msg, request)) LOG.info("successful scrape") try: article_json = utils.json_dumps(article_data) except ValueError as err: msg = "failed to serialize article data to article-json: %s" % str(err) return response(mkresponse(ERROR, msg, request)) LOG.info("successfully serialized article-data to article-json") # phew! gauntlet ran, we're now confident of passing this article-json to lax # lax may still reject the data as invalid, but we'll proxy that back if necessary params['article_json'] = article_json try: LOG.info("calling lax") lax_response = call_lax(**params) LOG.info("lax response: %r", lax_response) return response(mkresponse(**lax_response)) except Exception as err: # lax didn't understand us or broke msg = "lax failed attempting to handle our request: %s" % str(err) response(mkresponse(ERROR, msg, request)) # when lax fails, we fail raise
def handler(json_request, outgoing): response = partial(send_response, outgoing) try: request = utils.validate(json_request, conf.REQUEST_SCHEMA) except ValueError as err: # bad data. who knows what it was. die return response( mkresponse(ERROR, "request could not be parsed: %s" % json_request)) except ValidationError as err: # data is readable, but it's in an unknown/invalid format. die return response( mkresponse(ERROR, "request was incorrectly formed: %s" % str(err))) except Exception as err: # die msg = "unhandled error attempting to handle request: %s" % str(err) return response(mkresponse(ERROR, msg)) # we have a valid request :) LOG.info("valid request") params = subdict( request, ['action', 'id', 'token', 'version', 'force', 'validate-only']) params = renkeys(params, [('validate-only', 'dry_run')]) # if we're to ingest/publish, then we expect a location to download article data if params['action'] in [INGEST, INGEST_PUBLISH]: try: article_xml = download(request['location']) if not article_xml: raise ValueError("no article content available") except AssertionError as err: msg = "refusing to download article xml: %s" % str(err) return response(mkresponse(ERROR, msg, request)) except Exception as err: msg = "failed to download article xml from %r: %s" % ( request['location'], str(err)) return response(mkresponse(ERROR, msg, request)) LOG.info("got xml") try: article_data = scraper.render_single(article_xml, version=params['version'], location=request['location']) LOG.info("rendered article data ") except Exception as err: error = str(err) if hasattr(err, 'message') else err msg = "failed to render article-json from article-xml: %s" % error LOG.exception(msg, extra=params) return response(mkresponse(ERROR, msg, request)) LOG.info("successful scrape") try: article_json = utils.json_dumps(article_data) except ValueError as err: msg = "failed to serialize article data to article-json: %s" % str( err) return response(mkresponse(ERROR, msg, request)) LOG.info("successfully serialized article-data to article-json") # phew! gauntlet ran, we're now confident of passing this article-json to lax # lax may still reject the data as invalid, but we'll proxy that back if necessary params['article_json'] = article_json try: LOG.info("calling lax") # with params: %r" % params) lax_response = call_lax(**params) LOG.info("lax response: %r", lax_response) return response(mkresponse(**lax_response)) except Exception as err: # lax didn't understand us or broke msg = "lax failed attempting to handle our request: %s" % str(err) response(mkresponse(ERROR, msg, request)) # when lax fails, we fail raise
def import_article_version(article, article_data, create=True, update=False): expected_keys = ['title', 'version', 'update', 'pub-date', 'status'] kwargs = subdict(article_data, expected_keys) try: doi = article_data['doi'] version = int(kwargs['version']) version_date = kwargs.get('update') datetime_published = kwargs['pub-date'] context = {'article': doi, 'version': version} LOG.info("importing ArticleVersion", extra=context) if version_date and version == 1: # this is so common it's not even worth a debug #LOG.warn("inconsistency: a v1 has an 'update' date", extra=context) d1, d2 = striptz(version_date), striptz(datetime_published) if d1 != d2: c = {} c.update(context) c.update({'pub-date': datetime_published, 'update': version_date}) LOG.warn("double inconsistency: not only do we have an 'update' date for a v1, it doesn't match the date published", extra=c) # 'update' date occurred before publish date ... if d1 < d2: LOG.warn("triple inconsistency: not only do we have an 'update' date for a v1 that doesn't match the date published, it was actually updated *before* it was published", extra=c) if version == 1: version_date = datetime_published if not version_date and version > 1: LOG.warn("inconsistency: a version > 1 does not have an 'update' date", extra=context) if settings.FAIL_ON_NO_UPDATE_DATE: msg = "no 'update' date found for ArticleVersion" raise ValueError(msg) msg = "no 'update' date found for ArticleVersion, using None instead" LOG.warn(msg, extra=context) version_date = None # post process data kwargs.update({ 'article': article, 'version': version, 'datetime_published': todt(version_date), 'status': kwargs['status'].lower(), }) delall(kwargs, ['pub-date', 'update']) except KeyError: LOG.error("expected keys invalid/not present", extra={'expected_keys': expected_keys}) raise try: avobj = models.ArticleVersion.objects.get(article=article, version=kwargs['version']) if not update: msg = "Article with version does exists but update == False" LOG.warn(msg, extra=context) raise AssertionError(msg) LOG.debug("ArticleVersion found, updating") for key, val in kwargs.items(): setattr(avobj, key, val) avobj.save() LOG.info("updated existing ArticleVersion", extra=context) return avobj except models.ArticleVersion.DoesNotExist: if not create: msg = "ArticleVersion with version does not exist and create == False" LOG.warn(msg, extra=context) raise LOG.debug("ArticleVersion NOT found, creating", extra=context) avobj = models.ArticleVersion(**kwargs) avobj.save() LOG.info("created new ArticleVersion", extra=context) return avobj
def best_parameters_callback(job, parameters=None): try: submit_current_parameters = False parameters['cnt_parameters_seen'] = parameters['cnt_parameters_seen'] + 1 if not np.any(np.isnan(job.get_result())): if parameters['cnt_parameters_seen'] % parameters['plot_every'] == 0: # Let's plot again. print "\n\n >>>>> Will run new submission with these parameters: %s, fitness: %f \n\n" % (utils.subdict(job.experiment_parameters, dict_parameters_range.keys()), job.get_result()) np.save('./outputs/curr_params', dict(parameters=parameters)) submit_current_parameters = True if (np.any(np.isnan(parameters['result'])) or (job.get_result() <= parameters['result'])): # New best parameter! parameters['result'] = job.get_result() parameters['job_name'] = job.job_name parameters['parameters'] = job.experiment_parameters parameters['best_parameters'] = utils.subdict(job.experiment_parameters, dict_parameters_range.keys()) print "\n\n>>>>>> Found new best parameters: \n%s %s %s\n\n" % (parameters['best_parameters'], parameters['result'], parameters['job_name']) np.save('./outputs/best_params', dict(parameters=parameters)) submit_current_parameters = True # If desired, automatically create additional plots. if parameters.get('submit_parameters', False) and submit_current_parameters: pbs_submission_infos_copy = parameters['pbs_submission_infos_copy'] # Will check the best fitting parameters, and relaunch simulations for them, in order to get new cool plots. ## First do Memory curves + EM Fits pbs_submission_infos_copy['other_options'].update(dict( action_to_do='launcher_do_memory_curve_marginal_fi_withplots_live', subaction='collect_responses', collect_responses=None, inference_method='sample', N=300, num_samples=200, M=100, output_directory=os.path.join(simul_out_dir, 'outputs'), selection_method='last', num_repetitions=3, renormalize_sigmax=None, autoset_parameters=None, burn_samples=100, stimuli_generation='random', stimuli_generation_recall='random', session_id='cmaes_fitting_experiments_relaunchs', result_computation='filenameoutput', label='cmaes_Mratiosigmaxsigmaoutput_fitting_experiment_rerun_290814')) pbs_submission_infos_copy['walltime'] = '70:00:00' pbs_submission_infos_copy['submit_label'] = 'param_rerun' pbs_submission_infos_copy['qos'] = 'auto' submit_pbs = submitpbs.SubmitPBS(pbs_submission_infos=pbs_submission_infos_copy, debug=True) # Extract the parameters to try params_resend = [utils.subdict(job.experiment_parameters, dict_parameters_range.keys())] # Submit without waiting print "Submitting extra job for Plots, parameters:", params_resend submission_parameters_dict = dict(pbs_submission_infos=pbs_submission_infos_copy, submit_jobs=submit_jobs, wait_jobs_completed=False) submit_pbs.submit_minibatch_jobswrapper(params_resend, submission_parameters_dict) except Exception as e: print "Failure while submitting sub-task for current parameters. Continuing anyway." print parameters print e
def post_xml(): "upload jats xml, generate xml, validate, send to lax as a dry run" http_ensure('xml' in request.files, "xml file required", 400) try: override = scraper.deserialize_overrides(request.form.getlist('override')) except ValueError: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_OVERRIDES, 'message': 'an error occurred attempting to parse your given overrides.', 'trace': sio.getvalue() } # shouldn't this be a 400? # upload try: xml = request.files['xml'] filename = os.path.basename(xml.filename) http_ensure(os.path.splitext(filename)[1] == '.xml', "file doesn't look like xml") path = join(upload_folder(), filename) xml.save(path) except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_UPLOAD, 'message': 'an error occured uploading the article xml to be processed', 'trace': sio.getvalue(), }, 400 # shouldn't this be a 500? # generate try: article_json = scraper.main(path, { 'override': override, 'fill-missing-image-dimensions': True }) json_filename = filename + '.json' json_path = join(upload_folder(), json_filename) open(json_path, 'w').write(article_json) except Exception as err: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.BAD_SCRAPE, 'message': str(err), 'trace': sio.getvalue() }, 400 # validate try: conf.API_PRE_VALIDATE and ajson_validate.main(open(json_path, 'r')) except jsonschema.ValidationError as err: return { 'status': conf.INVALID, 'code': conf.ERROR_INVALID, 'message': 'the generated article-json failed validation, see trace for details.', 'trace': str(err), # todo: any good? }, 400 except Exception: sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_VALIDATING, 'message': 'an error occurred attempting to validate the generated article-json', 'trace': sio.getvalue() }, 400 # TODO: shouldn't this be a 500? # send to lax try: #msid, version = utils.version_from_path(filename) msid = request.args['id'] version = request.args['version'] token = str(uuid.uuid4()) args = { # the *most* important parameter. don't modify lax. 'dry_run': True, # a forced ingest by default 'action': conf.INGEST, 'force': True, # article details 'msid': msid, 'version': int(version), 'article_json': article_json, 'token': token, } lax_resp = adaptor.call_lax(**args) context = utils.renkeys(lax_resp, [("message", "lax-message")]) LOG.info("lax response", extra=context) api_resp = utils.subdict(lax_resp, ['status', 'code', 'message', 'trace']) if api_resp['status'] in [conf.INVALID, conf.ERROR]: # failure return api_resp, 400 # success # 'code', 'message' and 'trace' are not returned by lax on success, just 'status' api_resp['ajson'] = json.loads(article_json)['article'] api_resp['override'] = override return api_resp, 200 except Exception: # lax returned something indecipherable sio = StringIO() traceback.print_exc(file=sio) return { 'status': conf.ERROR, 'code': conf.ERROR_COMMUNICATING, 'message': "lax responded with something that couldn't be decoded", 'trace': sio.getvalue(), }, 400 # TODO: shouldn't this be a 500?