def run_off_sample(sm_config, ds_ids_str, sql_where, fix_missing, overwrite_existing): db = DB() ds_ids = None if ds_ids_str: ds_ids = ds_ids_str.split(',') elif sql_where: ds_ids = [ id for (id,) in db.select(f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}') ] elif fix_missing: logger.info('Checking for missing off-sample jobs...') results = db.select(MISSING_OFF_SAMPLE_SEL) ds_ids = [ds_id for ds_id, in results] logger.info(f'Found {len(ds_ids)} missing off-sample sets') if not ds_ids: logger.warning('No datasets match filter') return es_exp = ESExporter(db, sm_config) for i, ds_id in enumerate(ds_ids): try: logger.info(f'Running off-sample on {i+1} out of {len(ds_ids)}') ds = Dataset.load(db, ds_id) classify_dataset_ion_images(db, ds, sm_config['services'], overwrite_existing) es_exp.reindex_ds(ds_id) except Exception: logger.error(f'Failed to run off-sample on {ds_id}', exc_info=True)
def migrate_isotopic_images(ds_id): output.print('Migrating isotopic images') db = DB() image_ids = db.select_onecol(SEL_DS_IMG_IDS, params=(ds_id,)) es_exporter = ESExporter(db, sm_config) if image_ids and not _es_docs_migrated(es_exporter._es, ds_id): with timeit(): output.print('Transferring images...') output.print(len(image_ids)) transfer_images(ds_id, 'iso_images', image_storage.ISO, image_ids) with timeit(): output.print('Reindexing ES documents...') es_exporter.reindex_ds(ds_id)