def migrate_isotopic_images(ds_id):
    output.print('Migrating isotopic images')

    db = DB()
    image_ids = db.select_onecol(SEL_DS_IMG_IDS, params=(ds_id,))
    es_exporter = ESExporter(db, sm_config)
    if image_ids and not _es_docs_migrated(es_exporter._es, ds_id):

        with timeit():
            output.print('Transferring images...')
            output.print(len(image_ids))
            transfer_images(ds_id, 'iso_images', image_storage.ISO, image_ids)

        with timeit():
            output.print('Reindexing ES documents...')
            es_exporter.reindex_ds(ds_id)
Beispiel #2
0
def del_jobs(ds: Dataset, moldb_ids: Optional[Iterable[int]] = None):
    """
    Delete a dataset's jobs for the specified moldbs, or all jobs if moldb_ids is None.
    Also cleans up the annotations from ElasticSearch and deletes their ion images.
    """
    db = DB()
    es = ESExporter(db)

    if moldb_ids is None:
        moldb_ids = get_ds_moldb_ids(ds.id)
    moldbs = molecular_db.find_by_ids(moldb_ids)

    job_ids = DB().select_onecol(
        'SELECT j.id FROM job j WHERE ds_id = %s AND moldb_id = ANY(%s)',
        (ds.id, list(moldb_ids)))
    del_diagnostics(ds.id, job_ids)

    for moldb in moldbs:
        logger.info(
            f'Deleting isotopic images: ds_id={ds.id} ds_name={ds.name} moldb={moldb}'
        )
        img_id_rows = db.select_onecol(
            'SELECT iso_image_ids '
            'FROM annotation m '
            'JOIN job j ON j.id = m.job_id '
            'JOIN dataset d ON d.id = j.ds_id '
            'WHERE ds_id = %s AND j.moldb_id = %s',
            (ds.id, moldb.id),
        )

        image_ids = [
            img_id for img_ids in img_id_rows for img_id in img_ids
            if img_id is not None
        ]
        image_storage.delete_images(image_storage.ISO, ds.id, image_ids)

        logger.info(
            f"Deleting job results: ds_id={ds.id} ds_name={ds.name} moldb={moldb}"
        )
        db.alter('DELETE FROM job WHERE ds_id = %s and moldb_id = %s',
                 (ds.id, moldb.id))
        es.delete_ds(ds.id, moldb)
def find_dataset_ids(ds_ids_param, sql_where, missing, failed, succeeded):
    db = DB()

    if ds_ids_param:
        specified_ds_ids = ds_ids_param.split(',')
    elif sql_where:
        specified_ds_ids = db.select_onecol(
            f"SELECT id FROM dataset WHERE {sql_where}")
    else:
        specified_ds_ids = None
    if not missing:
        # Default to processing all datasets missing diagnostics
        missing = specified_ds_ids is None and not failed and not succeeded
    ds_type_counts = db.select(
        'SELECT d.id, COUNT(DISTINCT dd.type), COUNT(dd.error) '
        'FROM dataset d LEFT JOIN dataset_diagnostic dd on d.id = dd.ds_id '
        'WHERE d.status = \'FINISHED\' '
        'GROUP BY d.id')
    if missing or failed or succeeded:
        # Get ds_ids based on status (or filter specified ds_ids on status)
        status_ds_ids = set()
        for ds_id, n_diagnostics, n_errors in ds_type_counts:
            if missing and (n_diagnostics or 0) < len(DiagnosticType):
                status_ds_ids.add(ds_id)
            elif failed and n_errors > 0:
                status_ds_ids.add(ds_id)
            elif succeeded and n_diagnostics == len(
                    DiagnosticType) and n_errors == 0:
                status_ds_ids.add(ds_id)

        if specified_ds_ids is not None:
            # Keep order, if directly specified
            ds_ids = [
                ds_id for ds_id in specified_ds_ids if ds_id in status_ds_ids
            ]
        else:
            # Order by ID descending, so that newer DSs are updated first
            ds_ids = sorted(status_ds_ids, reverse=True)
    else:
        ds_ids = specified_ds_ids
    assert ds_ids, 'No datasets found'
    return ds_ids