Example #1
0
def copy_results(staging_storage,
                 production_storage,
                 SAMPLE_SIZE=0,
                 results_ids=[]):
    """Copy from mongo to sql"""

    if SAMPLE_SIZE == 0 and len(results_ids) == 0:
        return []

    results_ids = list(set(results_ids))

    if results_ids:
        count_to_copy = len(results_ids)
        prod_results = production_storage.get_results(id=results_ids,
                                                      status=None)['data']
    else:
        total_count = production_storage.get_total_count(ResultORM)
        print('------Total # of Results in the DB is: ', total_count)
        count_to_copy = get_number_to_copy(total_count, SAMPLE_SIZE)
        prod_results = production_storage.get_results(
            status=None, limit=count_to_copy)['data']

    print('Copying {} results'.format(count_to_copy))

    mols, keywords, kvstore = [], [], []
    for result in prod_results:
        if result['molecule']:
            mols.append(result['molecule'])
        if result['keywords']:
            keywords.append(result['keywords'])
        if result['stdout']:
            kvstore.append(result['stdout'])
        if result['stderr']:
            kvstore.append(result['stderr'])
        if result['error']:
            kvstore.append(result['error'])

    mols_map = copy_molecules(staging_storage, production_storage, mols)
    keywords_map = copy_keywords(staging_storage, production_storage, keywords)
    kvstore_map = copy_kv_store(staging_storage, production_storage, kvstore)

    for result in prod_results:
        result['molecule'] = mols_map[result['molecule']]
        if result['keywords']:
            result['keywords'] = keywords_map[result['keywords']]
        if result['stdout']:
            result['stdout'] = kvstore_map[result['stdout']]
        if result['stderr']:
            result['stderr'] = kvstore_map[result['stderr']]
        if result['error']:
            result['error'] = kvstore_map[result['error']]

    results_py = [ResultRecord(**res) for res in prod_results]
    staging_ids = staging_storage.add_results(results_py)['data']
    if VERBOSE:
        print('Inserted in SQL:', len(staging_ids))

    print('---- Done copying Results\n\n')

    return {m1: m2 for m1, m2 in zip(results_ids, staging_ids)}
    def _get_final_results(self,
                           optimization_ids: List[Union[int, str]] = None):
        """Return the actual results objects of the best result in each optimization"""

        if optimization_ids is None:
            self._raise_missing_attribute('final_result',
                                          'List of optimizations ids')

        sql_statement = text("""
            select * from base_result
            join (
                select opt_id, result.* from result
                join (
                    select opt.opt_id, opt.result_id, max_pos from opt_result_association as opt
                    inner join (
                            select opt_id, max(position) as max_pos from opt_result_association
                            where opt_id in :optimization_ids
                            group by opt_id
                        ) opt2
                    on opt.opt_id = opt2.opt_id and opt.position = opt2.max_pos
                ) traj
                on result.id = traj.result_id
            ) result
            on base_result.id = result.id
        """)

        # bind and expand ids list
        sql_statement = sql_statement.bindparams(
            bindparam("optimization_ids", expanding=True))

        # column types:
        columns = inspect(ResultORM).columns
        sql_statement = sql_statement.columns(opt_id=Integer, *columns)
        query_result = self.execute_query(
            sql_statement, optimization_ids=list(optimization_ids))

        ret = {}
        for rec in query_result:
            self._remove_excluded_keys(rec)
            key = rec.pop('opt_id')
            ret[key] = ResultRecord(**rec)

        return ret
def copy_results(mongo_storage, sql_storage, max_limit, with_check=False):
    """Copy from mongo to sql"""

    total_count = mongo_storage.get_total_count(ResultORM)
    print('------Total # of Results in the DB is: ', total_count)

    for skip in range(0, total_count, max_limit):

        print('\nCurrent skip={}\n-----------'.format(skip))
        ret = mongo_storage.get_results(status=None, limit=max_limit, skip=skip)
        mongo_res= ret['data']
        print('mongo results returned: ', len(mongo_res), ', total: ', ret['meta']['n_found'])

        # check if this patch has been already stored
        if is_mapped(sql_storage, ResultMap, mongo_res[-1]['id']):
            print('Skipping first ', skip+max_limit)
            continue

        # load mapped ids in memory
        mongo_res = get_ids_map(sql_storage, ['molecule'], MoleculeMap, mongo_res)
        mongo_res = get_ids_map(sql_storage, ['keywords'], KeywordsMap, mongo_res)
        mongo_res = get_ids_map(sql_storage, ['stdout', 'stderr', 'error'], KVStoreMap, mongo_res)

        results_py = [ResultRecord(**res) for res in mongo_res]
        sql_insered = sql_storage.add_results(results_py)['data']
        print('Inserted in SQL:', len(sql_insered))

        # store the ids mapping in the sql DB
        mongo_ids = [obj['id'] for obj in mongo_res]
        store_ids_map(sql_storage, mongo_ids, sql_insered, ResultMap)

        if with_check:
            with sql_storage.session_scope() as session:
                res = session.query(ResultMap).filter_by(mongo_id=mongo_res[0]['id']).first().sql_id

            ret = sql_storage.get_results(id=[res])
            print('Get from SQL:', ret['data'])

            ret2 = mongo_storage.get_results(id=[mongo_res[0]['id']])
            print('Get from Mongo:', ret2['data'])
            assert ret2['data'][0]['return_result'] == ret['data'][0]['return_result']

    print('---- Done copying Results\n\n')
    def _get_all_results(self, optimization_ids: List[Union[int, str]] = None):
        """Returns all the results objects (trajectory) of each optmization
        Returns list(list) """

        if optimization_ids is None:
            self._raise_missing_attribute('all_results',
                                          'List of optimizations ids')

        # row_to_json(result.*)
        sql_statement = text("""
            select * from base_result
            join (
                select opt_id, result.* from result
                join opt_result_association as traj
                on result.id = traj.result_id
                where traj.opt_id in :optimization_ids
            ) result
            on base_result.id = result.id
        """)

        # bind and expand ids list
        sql_statement = sql_statement.bindparams(
            bindparam("optimization_ids", expanding=True))

        # column types:
        columns = inspect(ResultORM).columns
        sql_statement = sql_statement.columns(opt_id=Integer, *columns)
        query_result = self.execute_query(
            sql_statement, optimization_ids=list(optimization_ids))

        ret = {}
        for rec in query_result:
            self._remove_excluded_keys(rec)
            key = rec.pop('opt_id')
            if key not in ret:
                ret[key] = []

            ret[key].append(ResultRecord(**rec))

        return ret
Example #5
0
def copy_results(staging_storage,
                 production_storage,
                 SAMPLE_SIZE=0,
                 results_ids=[]):
    """Copy from mongo to sql"""

    if SAMPLE_SIZE == 0 and len(results_ids) == 0:
        return []

    results_ids = list(set(results_ids))

    if results_ids:
        count_to_copy = len(results_ids)
        prod_results = production_storage.get_results(id=results_ids,
                                                      status=None)["data"]
    else:
        total_count = production_storage.get_total_count(ResultORM)
        print("------Total # of Results in the DB is: ", total_count)
        count_to_copy = get_number_to_copy(total_count, SAMPLE_SIZE)
        prod_results = production_storage.get_results(
            status=None, limit=count_to_copy)["data"]

    print("Copying {} results".format(count_to_copy))

    mols, keywords, kvstore, managers = [], [], [], []
    for result in prod_results:
        if result["molecule"]:
            mols.append(result["molecule"])
        if result["keywords"]:
            keywords.append(result["keywords"])
        if result["stdout"]:
            kvstore.append(result["stdout"])
        if result["stderr"]:
            kvstore.append(result["stderr"])
        if result["error"]:
            kvstore.append(result["error"])
        if result["manager_name"]:
            managers.append(result["manager_name"])

    copy_managers(staging_storage, production_storage, managers)
    mols_map = copy_molecules(staging_storage, production_storage, mols)
    keywords_map = copy_keywords(staging_storage, production_storage, keywords)
    kvstore_map = copy_kv_store(staging_storage, production_storage, kvstore)

    for result in prod_results:
        result["molecule"] = mols_map[result["molecule"]]
        if result["keywords"]:
            result["keywords"] = keywords_map[result["keywords"]]
        if result["stdout"]:
            result["stdout"] = kvstore_map[result["stdout"]]
        if result["stderr"]:
            result["stderr"] = kvstore_map[result["stderr"]]
        if result["error"]:
            result["error"] = kvstore_map[result["error"]]

        result.pop("extras")
        if result["protocols"] is None:
            result.pop("protocols")
        if result["manager_name"] is None:
            result.pop("manager_name")

    results_py = [ResultRecord(**res) for res in prod_results]
    staging_ids = staging_storage.add_results(results_py)["data"]
    if VERBOSE:
        print("Inserted in SQL:", len(staging_ids))

    print("---- Done copying Results\n\n")

    return {m1: m2 for m1, m2 in zip(results_ids, staging_ids)}