def regenerate(args, update_catalog=False, mongodb=None): if settings_module.parse_boolean(os.environ.get('MAKETESTS', '0')): DESTPATH = os.path.join(tempfile.mkdtemp(), 'experiment_reference.json') else: DESTPATH = os.path.join(os.getcwd(), 'datacatalog', 'definitions', 'jsondocs', 'experiment_reference.json') update_catalog = True logger.debug('Project config: ' + PARENT + '/config.yml') project_settings = config.read_config(places_list=[PARENT]) logger.debug('Local config:' + THIS + '/config.yml') bootstrap_settings = config.read_config(places_list=[THIS]) settings = dicthelpers.data_merge(project_settings, bootstrap_settings) env = args.environment if env is None: env = 'development' db = settings.get(env) schema = { 'description': 'Experiment reference enumeration', 'type': 'string', 'enum': [] } challenges = ChallengeMapping(settings['experiment_reference'], settings['google_client']) challenges.populate() for cp in challenges.filescache: if cp.get('uri', None) is not None: google_sheets_id = os.path.basename(cp.get('uri', None)) cp_uuid = identifiers.typeduuid.catalog_uuid( cp.get('id'), 'challenge_problem') cp_settings = copy.deepcopy(settings['experiment_reference']) cp_settings['google_sheets_id'] = google_sheets_id # Generate the experiment designs for each CP mapping = ExperimentReferenceMapping(cp_settings, settings['google_client']) mapping.populate() if update_catalog: if mongodb is None: mongodb = db['mongodb'] store = linkedstores.experiment_design.ExperimentDesignStore( mongodb) for doc in mapping.filescache: # print(doc) if doc['experiment_design_id'] != 'Unknown': doc['child_of'].append(cp_uuid) logger.info('SYNCING {}'.format(doc.get('title', None))) store.add_update_document(doc) for rec in mapping.filescache: if rec['experiment_design_id'] not in schema['enum']: schema['enum'].append(rec['experiment_design_id']) json.dump(schema, open(DESTPATH, 'w'), indent=2) return True
def main(args): logger.debug('Project config: ' + PARENT + '/config.yml') project_settings = config.read_config(places_list=[PARENT]) logger.debug('Local config:' + THIS + '/config.yml') bootstrap_settings = config.read_config(places_list=[THIS]) settings = dicthelpers.data_merge(project_settings, bootstrap_settings) env = args.environment if env is None: env = 'localhost' if args.verbose is True: settings['verbose'] = True else: settings['verbose'] = False mongodb = settings.get(env).get('mongodb') if args.command == 'list': dblist(mongodb, settings) elif args.command == 'auto': autobuild(mongodb, settings) elif args.command == 'create': raise NotImplementedError() elif args.command == 'delete': raise NotImplementedError()
def main(args): logger.debug('Project config: ' + PARENT + '/config.yml') project_settings = config.read_config(places_list=[PARENT]) logger.debug('Local config:' + THIS + '/config.yml') bootstrap_settings = config.read_config(places_list=[THIS]) settings = dicthelpers.data_merge(project_settings, bootstrap_settings) env = args.environment if env is None: env = 'localhost' if args.verbose is True: settings['verbose'] = True else: settings['verbose'] = False mongodb = settings.get(env).get('mongodb') mongodb_uri = mongo.get_mongo_uri(mongodb) logger.debug('URI: {}'.format(mongodb_uri)) database_name = None if args.database is not None: database_name = args.database else: database_name = settings.get(env).get('mongodb', {}).get('database', None) logger.debug('DB: {}'.format(database_name)) myclient = MongoClient(mongodb_uri) idb = myclient[database_name] if args.command == 'discover': autodiscover(idb, settings) elif args.command == 'auto': autobuild(idb, settings) elif args.command == 'create': raise NotImplementedError() elif args.command == 'delete': raise NotImplementedError()
def regenerate(args, update_catalog=False, mongodb=None): if settings_module.parse_boolean(os.environ.get('MAKETESTS', '0')): DESTPATH = os.path.join(tempfile.mkdtemp(), 'challenge_problem_id.json') else: DESTPATH = os.path.join(os.getcwd(), 'datacatalog', 'definitions', 'jsondocs', 'challenge_problem_id.json') update_catalog = True logger.debug('Project config: ' + PARENT + '/config.yml') project_settings = config.read_config(places_list=[PARENT]) logger.debug('Local config:' + THIS + '/config.yml') bootstrap_settings = config.read_config(places_list=[THIS]) settings = dicthelpers.data_merge(project_settings, bootstrap_settings) env = args.environment if env is None: env = 'development' db = settings.get(env) mapping = ChallengeMapping(settings['experiment_reference'], settings['google_client']) mapping.populate() # # Experiment records: Insert into experiment_reference collection # # FIXME - We don't know which challenge_problem they are children of schemadef = mapping.populate().generate_schema_definitions() json.dump(schemadef, open(DESTPATH, 'w'), indent=2) if update_catalog: if mongodb is None: mongodb = db['mongodb'] store = linkedstores.challenge_problem.ChallengeStore(mongodb) for doc in mapping.filescache: logger.info('SYNCING {}'.format(doc.get('title', None))) store.add_update_document(doc) return True
def main(args): logger.debug('Project config: ' + PARENT + '/config.yml') project_settings = config.read_config(places_list=[PARENT]) logger.debug('Local config:' + THIS + '/config.yml') bootstrap_settings = config.read_config(places_list=[THIS]) settings = dicthelpers.data_merge(project_settings, bootstrap_settings) # mongodb = project_settings.get('mongodb') # mongodb_uri = datacatalog.mongo.get_mongo_uri(mongodb) # myclient = MongoClient(mongodb_uri) env = args.environment if env is None: env = 'localhost' if args.verbose is True: settings['verbose'] = True else: settings['verbose'] = False mongodb = settings.get(env).get('mongodb') mongodb_root = { 'host': mongodb['host'], 'port': mongodb['port'], 'username': '******', 'password': mongodb['root_password'] } mongodb_uri = mongo.get_mongo_uri(mongodb_root) logger.debug('MongoDB: {}'.format(mongodb_uri)) myclient = MongoClient(mongodb_uri) database_name = mongodb.get('database', args.database) if database_name is not None: logger.info('Ensuring existing of {}'.format(database_name)) myclient[database_name] myclient[database_name]['_keep'].insert_one( {'note': 'database provisioned'}) roles = [{'role': 'dbOwner', 'db': database_name}] try: myclient['admin'].command("createUser", mongodb['username'], pwd=mongodb['password'], roles=roles) except OperationFailure: myclient['admin'].command("updateUser", mongodb['username'], pwd=mongodb['password'], roles=roles) except Exception as opf: logger.warning(opf) try: myclient[database_name].command("createUser", mongodb['username'], pwd=mongodb['password'], roles=roles) except OperationFailure: myclient[database_name].command("updateUser", mongodb['username'], pwd=mongodb['password'], roles=roles) except Exception as opf: logger.warning(opf) # except OperationFailure: # pass else: raise Exception( 'Failed to find database name in config or command line options')
ENVIRONMENT = os.environ.get('DB_ENV', 'localhost') HERE = os.getcwd() SELF = __file__ THIS = os.path.dirname(SELF) PARENT = os.path.dirname(THIS) GPARENT = os.path.dirname(PARENT) # Use local not installed install of datacatalog if HERE not in sys.path: sys.path.insert(0, HERE) from datacatalog.identifiers import abaco from datacatalog import dicthelpers project_settings = config.read_config(places_list=[PARENT]) bootstrap_settings = config.read_config(places_list=[THIS]) settings = dicthelpers.data_merge(project_settings, bootstrap_settings) settings = AttrDict({ 'mongodb': settings.get(ENVIRONMENT, {}).get('mongodb'), 'pipelines': {'pipeline_uuid': '106c46ff-8186-5756-a934-071f4497b58d', 'pipeline_manager_id': abaco.actorid.mock(), 'pipeline_manager_nonce': abaco.nonceid.mock(), 'job_manager_id': abaco.actorid.mock(), 'job_manager_nonce': abaco.nonceid.mock(), 'job_indexer_id': abaco.actorid.mock(), 'job_indexer_nonce': abaco.nonceid.mock()} })
def main(args): logger.setLevel(logging.DEBUG) def get_v1_items(filter={}): """Returns a cursor of v1 items""" return v1_stores['pipelinejob'].find(filter=filter) def get_v2_items(): """Returns a cursor of v1 items""" v2_stores['pipelinejob'].find(filter) settings = config.read_config() mongodb_v2 = settings.get('mongodb') mongodb_v1 = copy.copy(mongodb_v2) # Make overridable mongodb_v1['database'] = 'catalog' db1 = datacatalog.mongo.db_connection(mongodb_v1) v1_stores = dict() v1_stores['pipeline'] = db1['pipelines'] v1_stores['pipelinejob'] = db1['jobs'] v2_stores = dict() v2_stores['pipeline'] = datacatalog.linkedstores.pipeline.PipelineStore(mongodb_v2) v2_stores['pipelinejob'] = datacatalog.linkedstores.pipelinejob.PipelineJobStore(mongodb_v2) jobs = get_v1_items() jc = 0 logger.info('Jobs found: %s', jobs.count()) for job in jobs: job_doc = dict() jc = jc + 1 logger.debug('Processing job %s', jc) # Lift over UUID try: ouuid = str(job['uuid']) nuuid = typeduuid.catalog_uuid_from_v1_uuid(ouuid, uuid_type='pipelinejob') except Exception: logger.critical('Unable to translate %s. Skipping.', ouuid) continue try: opuuid = str(job['pipeline_uuid']) npuuid = typeduuid.catalog_uuid_from_v1_uuid(opuuid, uuid_type='pipeline') except Exception: logger.critical('Unable to translate %s. Skipping.', opuuid) continue logger.info('UUID %s remapped to %s', ouuid, nuuid) # Don't overwrite previously migrated jobs if v2_stores['pipelinejob'].coll.find_one({'uuid': nuuid}) is not None: logger.critical('Destination job exists. Skipping.') continue job_doc['uuid'] = nuuid job_doc['archive_path'] = os.path.join('/', job['path']) job_doc['archive_system'] = 'data-sd2e-community' job_doc['session'] = job.get('session', interestinganimal.generate( timestamp=False)) job_doc['updated'] = job.get('updated') job_doc['state'] = job.get('status', 'CREATED') job_doc['last_event'] = job.get('last_event', 'update').lower() job_doc['pipeline_uuid'] = npuuid # Linkages job_doc['generated_by'] = [npuuid] job_doc['child_of'] = list() job_doc['derived_from'] = list() # Agent/task if 'actor_id' in job: job_doc['agent'] = 'https://api.sd2e.org/actors/v2/' + job.get('actor_id') else: job_doc['agent'] = 'https://api.sd2e.org/actors/v2/MEzqaw4rkWZoK' job_doc['task'] = None # Lift over top-level data old_data = job.get('data', dict()) new_data = dict() # Lift over parameters # Also establish derived_from params for oldkey, newkey, uuid_type in [ ('sample_id', 'sample_id', 'sample'), ('experiment_reference', 'experiment_design_id', 'experiment'), ('measurement_id', 'measurement_id', 'measurement')]: old_data_filtered = copy.deepcopy(old_data) if oldkey in old_data: new_data[newkey] = old_data[oldkey] old_data_filtered.pop(oldkey) value_uuid = typeduuid.catalog_uuid( old_data[oldkey], uuid_type=uuid_type) job_doc['derived_from'].append(value_uuid) # Merge lifted data and other data fields new_data = data_merge(old_data_filtered, new_data) if new_data is None: new_data = dict() job_doc['data'] = new_data # Port job history v2_history = list() for v1_event in job.get('history', []): v2_name = list(v1_event.keys())[0] v2_event = {'date': v1_event.get(v2_name).get('date'), 'data': v1_event.get(v2_name, {}).get('data', dict()), 'name': v2_name.lower(), 'uuid': typeduuid.generate( uuid_type='pipelinejob_event', binary=False)} if v2_event['data'] is None: v2_event['data'] = dict() v2_history.append(v2_event) v2_history = sorted(v2_history, key=lambda k: k['date']) job_doc['history'] = v2_history # Set system-managed keys job_doc = v2_stores['pipelinejob'].set_private_keys( job_doc, source=SELF) if args.verbose: pprint(job_doc) resp = v2_stores['pipelinejob'].coll.insert_one(job_doc) logger.debug('Inserted document {}'.format( resp.inserted_id))