예제 #1
0
    def get_matching_files(self):
        """ Gets the files matching the processing parameters.

        Returns
        -------
        results : sqlalchemy.orm.query.Query
            A collection of files represented as a sqlalchemy query object.
        """
        with session_scope(self.session_maker) as session:
            td = (datetime.datetime.now(pytz.utc) -
                  datetime.timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
            testing_date = datetime.datetime.strptime(str(td),
                                                      "%Y-%m-%d %H:%M:%S")
            if self.volume:
                volstr = '%' + self.volume + '%'
                results = session.query(Files).filter(
                    Files.archiveid == self.archive_id,
                    Files.filename.like(volstr)).filter(
                        or_(
                            cast(Files.di_date, Date) < testing_date,
                            cast(Files.di_date, Date) is None))
            else:
                results = session.query(Files).filter(
                    Files.archiveid == self.archive_id).filter(
                        or_(
                            cast(Files.di_date, Date) < testing_date,
                            cast(Files.di_date, Date) is None))

        return results
예제 #2
0
def main(user_args):
    pds_id = user_args.pdsid
    delete_from_di = user_args.di
    delete_from_upc = user_args.upc

    if delete_from_di:
        pds_session_maker, pds_engine = db_connect(pds_db)
        with session_scope(pds_session_maker) as session:
            query_res = session.query(Files).filter(
                Files.filename.contains(pds_id))
            num_pds_queries = len(list(query_res))

            while (True):
                print(f'You will be deleteing {num_pds_queries} from the di ' +
                      f"database {credentials[pds_db]['db']}")
                user_answer = input('Are you sure?[Y/N]:')

                if user_answer == 'Y' or user_answer == 'N':
                    break
                else:
                    print(f'Invalid input: {user_answer}')

            if user_answer == 'Y':
                for record in query_res:
                    session.delete(record)

    if delete_from_upc:
        upc_session_maker, upc_engine = db_connect(upc_db)
        with session_scope(upc_session_maker) as session:
            query_res = upc_session.query(DataFiles).filter(
                DataFiles.productid == pds_id)
            num_upc_queries = len(list(query_res))
            while (True):
                print(
                    f'You will be deleteing {num_upc_queries} from the upc ' +
                    f"database {credentials[upc_db]['db']}")
                user_answer = input('Are you sure?[Y/N]:')

                if user_answer == 'Y' or user_answer == 'N':
                    break
                else:
                    print(f'Invalid input: {user_answer}')

            if user_answer == 'Y':
                for record in query_res:
                    session.delete(record)
예제 #3
0
def add_url(input_file, upc_id, session_maker):
    outputfile = input_file.replace(derived_base, derived_url)
    thumb = outputfile + '.thumbnail.jpg'
    browse = outputfile + '.browse.jpg'
    with session_scope(session_maker) as session:
        q_record = session.query(JsonKeywords).filter(JsonKeywords.upcid == upc_id)
        params = {}
        old_json = q_record.first().jsonkeywords
        old_json['browse'] = browse
        old_json['thumbnail'] = thumb
        params['jsonkeywords'] = old_json

        q_record.update(params, False)
예제 #4
0
def main():
    Session, _ = db_connect(upc_db)

    path = summaries_path

    with session_scope(Session) as session:
        print("Creating Hist Table")
        session.execute(query)
        histogram_qobj = session.query("histogram_summary")
        total_rows = session.execute(
            "SELECT count(*) FROM histogram_summary;").first()[0]
        page_number = 0
        number_of_rows_per_page = 200000
        complete_json_output = []
        print("Paging hist results")
        while True:
            lower_bound = page_number * number_of_rows_per_page
            upper_bound = (page_number *
                           number_of_rows_per_page) + number_of_rows_per_page

            if upper_bound > total_rows:
                number_of_rows_per_page = total_rows - lower_bound

            json_query = "with t AS (SELECT * FROM histogram_summary LIMIT {} OFFSET {}) SELECT json_agg(t) FROM t;".format(
                number_of_rows_per_page, lower_bound)
            output = session.execute(json_query).fetchall()
            complete_json_output.extend([dict(line) for line in output])

            page_number += 1

            if upper_bound > total_rows:
                break

        print("Finished view generation")

    print("Writing Json")
    json_output = json.dumps(complete_json_output)
    with open(path + "histogram_summary.json", "a") as json_file:
        json_file.write(json_output)
예제 #5
0
    def get_matching_files(self):
        """ Gets the files matching the processing parameters.

        Returns
        -------
        results : sqlalchemy.orm.query.Query
            A collection of files represented as a sqlalchemy query object.
        """
        with session_scope(self.session_maker) as session:
            if self.volume:
                volstr = '%' + self.volume + '%'
                results = session.query(Files).filter(
                    Files.archiveid == self.archive_id,
                    Files.filename.like(volstr), Files.upc_required == 't')
            else:
                results = session.query(Files).filter(
                    Files.archiveid == self.archive_id,
                    Files.upc_required == 't')
            if self.search:
                qf = '%' + self.search + '%'
                results = results.filter(Files.filename.like(qf))

        return results
예제 #6
0
def main(user_args):
    upc_session_maker, upc_engine = db_connect(upc_db)

    persist = user_args.persist
    log_level = user_args.log_level
    namespace = user_args.namespace

    try:
        slurm_job_id = os.environ['SLURM_ARRAY_JOB_ID']
        slurm_array_id = os.environ['SLURM_ARRAY_TASK_ID']
    except:
        slurm_job_id = ''
        slurm_array_id = ''

    inputfile = ''
    context = {'job_id': slurm_job_id, 'array_id':slurm_array_id, 'inputfile': inputfile}
    logger = logging.getLogger('UPC_Process')
    level = logging.getLevelName(log_level)
    logger.setLevel(level)
    log_file_handle = logging.FileHandler(pds_log + 'Process.log')
    formatter = logging.Formatter(
        '%(asctime)s - %(job_id)s - %(array_id)s - %(inputfile)s - %(name)s - %(levelname)s, %(message)s')
    log_file_handle.setFormatter(formatter)
    logger.addHandler(log_file_handle)
    logger = logging.LoggerAdapter(logger, context)

    # Redis Queue Objects
    RQ_main = RedisQueue('UPC_UpdateQueue', namespace)
    RQ_work = RedisQueue('UPC_UpdateWorkQueue', namespace)
    logger.info("UPC Update Queue: %s", RQ_main.id_name)

    RQ_error = RedisQueue(upc_error_queue)
    RQ_lock = RedisLock(lock_obj)
    # If the queue isn't registered, add it and set it to "running"
    RQ_lock.add({RQ_main.id_name: '1'})
    # while there are items in the redis queue
    while int(RQ_main.QueueSize()) > 0 and RQ_lock.available(RQ_main.id_name):
        # get a file from the queue
        item = RQ_main.Qfile2Qwork(RQ_main.getQueueName(), RQ_work.getQueueName())
        item_list = literal_eval(item)
        inputfile = item_list[0]
        archive = item_list[1]
        failing_command = item_list[2]
        update_type = item_list[3]
        upc_id = None

        if not os.path.isfile(inputfile):
            RQ_error.QueueAdd(f'Unable to locate or access {inputfile} during UPC update')
            logger.debug("%s is not a file\n", inputfile)
            exit()

        # Build URL for edr_source
        edr_source = inputfile.replace(workarea, web_base)

        # Update the logger context to include inputfile
        context['inputfile'] = inputfile

        try:
            session = upc_session_maker()
            session.close()
        except TypeError as e:
            logger.error("Unable to create a database session/connection to the upc database: %s", e)
            raise e

        try:
            if update_type.lower() == 'upc':

                recipe_file = recipe_base + "/" + archive + '.json'
                no_extension_inputfile = os.path.splitext(inputfile)[0]
                cam_info_file = no_extension_inputfile + '_caminfo.pvl'
                footprint_file = no_extension_inputfile + '_footprint.json'
                catlab_output = no_extension_inputfile + '_catlab.pvl'

                with open(recipe_file) as fp:
                    upc_json = json.load(fp)['upc']
                    # Attempt to get the optional search_term_mapping for the upc
                    # process
                    try:
                        search_term_mapping = upc_json['search_term_mapping']
                    except KeyError:
                        search_term_mapping = {}

                # Some datasets with attached PDS labels cause PVL to hang,
                #  so recipe includes call to dump label using `catlab`
                # If present, use the catlab output as pds_label instead of inputfile
                if os.path.exists(catlab_output):
                    pds_label = pvl.load(catlab_output)
                else:
                    pds_label = pvl.load(inputfile)

                instrument_name = get_instrument_name(pds_label)
                spacecraft_name = get_spacecraft_name(pds_label)
                target_name = get_target_name(pds_label)
                with session_scope(upc_session_maker) as session:
                    target_qobj = Targets.create(session, targetname=target_name,
                                                 displayname=target_name.title(),
                                                 system=target_name)
                    target_id = target_qobj.targetid



                with session_scope(upc_session_maker) as session:
                    instrument_qobj = Instruments.create(session, instrument=instrument_name,
                                                         spacecraft=spacecraft_name)
                    instrument_id = instrument_qobj.instrumentid

                ######## Generate DataFiles Record ########
                datafile_attributes = create_datafiles_atts(pds_label, edr_source, no_extension_inputfile + '.cub')

                datafile_attributes['instrumentid'] = instrument_id
                datafile_attributes['targetid'] = target_id

                with session_scope(upc_session_maker) as session:
                    datafile_qobj = DataFiles.create(session, **datafile_attributes)
                    upc_id = datafile_qobj.upcid

                ######## Generate SearchTerms Record ########
                search_term_attributes = create_search_terms_atts(cam_info_file, upc_id,
                                                                  no_extension_inputfile + '.cub',
                                                                  footprint_file, search_term_mapping)

                search_term_attributes['targetid'] = target_id
                search_term_attributes['instrumentid'] = instrument_id

                with session_scope(upc_session_maker) as session:
                    SearchTerms.create(session, **search_term_attributes)

                ######## Generate JsonKeywords Record ########
                json_keywords_attributes = create_json_keywords_atts(cam_info_file, upc_id, inputfile,
                                                                     failing_command, logger)

                with session_scope(upc_session_maker) as session:
                    JsonKeywords.create(session, **json_keywords_attributes)

            # Derived Processing:

            # If we don't have a upcid, get the matching ID from the database
            if not upc_id:
                with session_scope(upc_session_maker) as session:
                    src = inputfile.replace(workarea, web_base)
                    datafile = session.query(DataFiles).filter(or_(DataFiles.source == src,
                                                                   DataFiles.detached_label == src)).first()
                    if not datafile:
                        RQ_error.QueueAdd(f'No matching upcid was found for {inputfile}, '
                                          'derived product paths could not be added')
                        logger.warning(f'No matching upcid was found for %s, '
                                       'derived product paths could not be added', inputfile)
                    upc_id = datafile.upcid

            final_path = makedir(inputfile)
            src = os.path.splitext(inputfile)[0]
            derived_product = os.path.join(final_path, os.path.splitext(os.path.basename(inputfile))[0])

            # If derived products exist, copy them to the derived area and add the path to the db
            try:
                shutil.move(src + '.browse.jpg', derived_product + '.browse.jpg')
                shutil.move(src + '.thumbnail.jpg', derived_product + '.thumbnail.jpg')
                add_url(derived_product, upc_id, upc_session_maker)
            except FileNotFoundError:
                RQ_error.QueueAdd(f'Unable to locate or access derived products for {inputfile}')
                logger.warning(f'Unable to locate or access derived products for %s', inputfile)

            if not persist:
                # Remove all files file from the workarea except for the copied
                # source file
                file_prefix = os.path.splitext(inputfile)[0]
                workarea_files = glob(file_prefix + '*')
                # os.remove(os.path.join(workarea, 'print.prt'))
                for file in workarea_files:
                    os.remove(file)

        # Handle SQL specific database errors
        except SQLAlchemyError as e:
            logger.error("Database operation failed: %s \nRequeueing (%s, %s)", e, inputfile, archive)
            RQ_main.QueueAdd((inputfile, archive, failing_command, update_type))
            raise e

        RQ_work.QueueRemove(item)

        # Disconnect from the engines
        upc_engine.dispose()