def copy_results(staging_storage, production_storage, SAMPLE_SIZE=0, results_ids=[]): """Copy from mongo to sql""" if SAMPLE_SIZE == 0 and len(results_ids) == 0: return [] results_ids = list(set(results_ids)) if results_ids: count_to_copy = len(results_ids) prod_results = production_storage.get_results(id=results_ids, status=None)['data'] else: total_count = production_storage.get_total_count(ResultORM) print('------Total # of Results in the DB is: ', total_count) count_to_copy = get_number_to_copy(total_count, SAMPLE_SIZE) prod_results = production_storage.get_results( status=None, limit=count_to_copy)['data'] print('Copying {} results'.format(count_to_copy)) mols, keywords, kvstore = [], [], [] for result in prod_results: if result['molecule']: mols.append(result['molecule']) if result['keywords']: keywords.append(result['keywords']) if result['stdout']: kvstore.append(result['stdout']) if result['stderr']: kvstore.append(result['stderr']) if result['error']: kvstore.append(result['error']) mols_map = copy_molecules(staging_storage, production_storage, mols) keywords_map = copy_keywords(staging_storage, production_storage, keywords) kvstore_map = copy_kv_store(staging_storage, production_storage, kvstore) for result in prod_results: result['molecule'] = mols_map[result['molecule']] if result['keywords']: result['keywords'] = keywords_map[result['keywords']] if result['stdout']: result['stdout'] = kvstore_map[result['stdout']] if result['stderr']: result['stderr'] = kvstore_map[result['stderr']] if result['error']: result['error'] = kvstore_map[result['error']] results_py = [ResultRecord(**res) for res in prod_results] staging_ids = staging_storage.add_results(results_py)['data'] if VERBOSE: print('Inserted in SQL:', len(staging_ids)) print('---- Done copying Results\n\n') return {m1: m2 for m1, m2 in zip(results_ids, staging_ids)}
def _get_final_results(self, optimization_ids: List[Union[int, str]] = None): """Return the actual results objects of the best result in each optimization""" if optimization_ids is None: self._raise_missing_attribute('final_result', 'List of optimizations ids') sql_statement = text(""" select * from base_result join ( select opt_id, result.* from result join ( select opt.opt_id, opt.result_id, max_pos from opt_result_association as opt inner join ( select opt_id, max(position) as max_pos from opt_result_association where opt_id in :optimization_ids group by opt_id ) opt2 on opt.opt_id = opt2.opt_id and opt.position = opt2.max_pos ) traj on result.id = traj.result_id ) result on base_result.id = result.id """) # bind and expand ids list sql_statement = sql_statement.bindparams( bindparam("optimization_ids", expanding=True)) # column types: columns = inspect(ResultORM).columns sql_statement = sql_statement.columns(opt_id=Integer, *columns) query_result = self.execute_query( sql_statement, optimization_ids=list(optimization_ids)) ret = {} for rec in query_result: self._remove_excluded_keys(rec) key = rec.pop('opt_id') ret[key] = ResultRecord(**rec) return ret
def copy_results(mongo_storage, sql_storage, max_limit, with_check=False): """Copy from mongo to sql""" total_count = mongo_storage.get_total_count(ResultORM) print('------Total # of Results in the DB is: ', total_count) for skip in range(0, total_count, max_limit): print('\nCurrent skip={}\n-----------'.format(skip)) ret = mongo_storage.get_results(status=None, limit=max_limit, skip=skip) mongo_res= ret['data'] print('mongo results returned: ', len(mongo_res), ', total: ', ret['meta']['n_found']) # check if this patch has been already stored if is_mapped(sql_storage, ResultMap, mongo_res[-1]['id']): print('Skipping first ', skip+max_limit) continue # load mapped ids in memory mongo_res = get_ids_map(sql_storage, ['molecule'], MoleculeMap, mongo_res) mongo_res = get_ids_map(sql_storage, ['keywords'], KeywordsMap, mongo_res) mongo_res = get_ids_map(sql_storage, ['stdout', 'stderr', 'error'], KVStoreMap, mongo_res) results_py = [ResultRecord(**res) for res in mongo_res] sql_insered = sql_storage.add_results(results_py)['data'] print('Inserted in SQL:', len(sql_insered)) # store the ids mapping in the sql DB mongo_ids = [obj['id'] for obj in mongo_res] store_ids_map(sql_storage, mongo_ids, sql_insered, ResultMap) if with_check: with sql_storage.session_scope() as session: res = session.query(ResultMap).filter_by(mongo_id=mongo_res[0]['id']).first().sql_id ret = sql_storage.get_results(id=[res]) print('Get from SQL:', ret['data']) ret2 = mongo_storage.get_results(id=[mongo_res[0]['id']]) print('Get from Mongo:', ret2['data']) assert ret2['data'][0]['return_result'] == ret['data'][0]['return_result'] print('---- Done copying Results\n\n')
def _get_all_results(self, optimization_ids: List[Union[int, str]] = None): """Returns all the results objects (trajectory) of each optmization Returns list(list) """ if optimization_ids is None: self._raise_missing_attribute('all_results', 'List of optimizations ids') # row_to_json(result.*) sql_statement = text(""" select * from base_result join ( select opt_id, result.* from result join opt_result_association as traj on result.id = traj.result_id where traj.opt_id in :optimization_ids ) result on base_result.id = result.id """) # bind and expand ids list sql_statement = sql_statement.bindparams( bindparam("optimization_ids", expanding=True)) # column types: columns = inspect(ResultORM).columns sql_statement = sql_statement.columns(opt_id=Integer, *columns) query_result = self.execute_query( sql_statement, optimization_ids=list(optimization_ids)) ret = {} for rec in query_result: self._remove_excluded_keys(rec) key = rec.pop('opt_id') if key not in ret: ret[key] = [] ret[key].append(ResultRecord(**rec)) return ret
def copy_results(staging_storage, production_storage, SAMPLE_SIZE=0, results_ids=[]): """Copy from mongo to sql""" if SAMPLE_SIZE == 0 and len(results_ids) == 0: return [] results_ids = list(set(results_ids)) if results_ids: count_to_copy = len(results_ids) prod_results = production_storage.get_results(id=results_ids, status=None)["data"] else: total_count = production_storage.get_total_count(ResultORM) print("------Total # of Results in the DB is: ", total_count) count_to_copy = get_number_to_copy(total_count, SAMPLE_SIZE) prod_results = production_storage.get_results( status=None, limit=count_to_copy)["data"] print("Copying {} results".format(count_to_copy)) mols, keywords, kvstore, managers = [], [], [], [] for result in prod_results: if result["molecule"]: mols.append(result["molecule"]) if result["keywords"]: keywords.append(result["keywords"]) if result["stdout"]: kvstore.append(result["stdout"]) if result["stderr"]: kvstore.append(result["stderr"]) if result["error"]: kvstore.append(result["error"]) if result["manager_name"]: managers.append(result["manager_name"]) copy_managers(staging_storage, production_storage, managers) mols_map = copy_molecules(staging_storage, production_storage, mols) keywords_map = copy_keywords(staging_storage, production_storage, keywords) kvstore_map = copy_kv_store(staging_storage, production_storage, kvstore) for result in prod_results: result["molecule"] = mols_map[result["molecule"]] if result["keywords"]: result["keywords"] = keywords_map[result["keywords"]] if result["stdout"]: result["stdout"] = kvstore_map[result["stdout"]] if result["stderr"]: result["stderr"] = kvstore_map[result["stderr"]] if result["error"]: result["error"] = kvstore_map[result["error"]] result.pop("extras") if result["protocols"] is None: result.pop("protocols") if result["manager_name"] is None: result.pop("manager_name") results_py = [ResultRecord(**res) for res in prod_results] staging_ids = staging_storage.add_results(results_py)["data"] if VERBOSE: print("Inserted in SQL:", len(staging_ids)) print("---- Done copying Results\n\n") return {m1: m2 for m1, m2 in zip(results_ids, staging_ids)}