コード例 #1
0
ファイル: file_utils.py プロジェクト: cactusspine/work30
def get_md5_path_from_db(f_lst,
                         conn,
                         cursor,
                         log_file,
                         proc_id='Unknown',
                         hsm_root=False,
                         dms_root=False,
                         product_id=False,
                         cursor_factory=False):
    CMD = 'get-prod-md5-path'
    f_lst_str = convert_list_to_db_str(f_lst)

    query = db_query.get_prod_path_md5_products(p_name_lst=f_lst_str,
                                                hsm_root=hsm_root,
                                                dms_root=dms_root)
    err = submit_query(query, cursor, conn=conn)
    check_error(proc_id, err['code'], CMD, log_file, arg_err=err['msg'])
    keys = ['product_file', 'md5', 'id', 'name']

    if product_id:
        keys.append('id')

    out = {k: [] for k in keys}

    if err['code'] == 0:
        err_code = 0
        if cursor_factory:
            for res in cursor:
                for k in keys:
                    out[k].append(res[k])
        else:
            for res in cursor:
                for i, k in enumerate(keys):
                    out[k].append(res[i])
    return err['code'], out
コード例 #2
0
ファイル: extractMetadata.py プロジェクト: cactusspine/work30
    def checkdic(self, keys_list, cursor, version=1, conn=None):
        '''
        This function take a list of the field and cursor connection as input
        and return the corresponding keys lists if it exist in the database.
        checkdic(list,cursor,*version,*connection)->list,error_code
        '''
        query = '''SELECT field, keys
                    FROM  eodas.field_x_keys
                    WHERE version = {}'''
        err = submit_query(query.format(version),
                           cursor,
                           commit=False,
                           conn=conn)
        if err['code'] != 0:
            print err['ms']
            return [], err['code']

        dic_kf = {}
        if cursor.rowcount == 0:
            return None, 401  # empty reply from the DB
        else:
            for row in cursor:
                for kv in row[1]:
                    if kv in dic_kf:
                        # this kv already have an associated field
                        dic_kf[kv].append(row[0])
                    else:
                        dic_kf[kv] = [row[0]]
        #fieldlist = [dic_kf[attri] for attri in keys_list if attri in dic_kf.keys()]

        fieldlist = [dic_kf.get(attri, None) for attri in keys_list]
        return fieldlist, 0
コード例 #3
0
ファイル: file_utils.py プロジェクト: cactusspine/work30
def get_product_md5_and_path(p_name,
                             conn,
                             cursor,
                             log_file,
                             proc_id='Unknown',
                             p_id=None):
    CMD = 'get-prod-md5-path'
    query = db_query.get_prod_path_md5(p_name)
    err = submit_query(query, cursor, conn=conn)
    check_error(proc_id, err['code'], CMD, log_file, arg_err=err['msg'])
    info = {
        'md5': None,
        'path': None,
    }

    if err['code'] != 0:
        err_code = err['code']
    else:
        res = cursor.fetchone()

        if res:
            err_code = 0
            if p_id:
                info['id'] = None
            try:
                for k in info.keys():
                    info[k] = res[k]
            except:
                for i, k in enumerate(info.keys()):
                    info[k] = res[i]
        else:
            err_code = 501

    return err_code, info
コード例 #4
0
ファイル: file_utils.py プロジェクト: cactusspine/work30
def get_product_md5(p_name,
                    conn,
                    cursor,
                    log_file,
                    proc_id='Unknown',
                    pid=False):
    CMD = 'get-prod-md5'
    query = db_query.get_md5_product(p_name)
    err = submit_query(query, cursor, conn=conn)
    check_error(proc_id, err['code'], CMD, log_file, arg_err=err['msg'])
    if err['code'] != 0:
        if pid:
            return err['code'], None, None
        else:
            return err['code'], None
    res = cursor.fetchone()
    if res is None:
        if pid:
            return 501, None, None
        else:
            return 501, None
    else:
        try:
            if pid:
                return 0, res['md5'], res['id']
            else:
                return 0, res['md5']
        except:
            if pid:
                return 0, res[0], res[1]
            else:
                return 0, res[0]
コード例 #5
0
ファイル: tape_utils.py プロジェクト: cactusspine/work30
def get_prod_info(media, args, cursor, query, sample_size, log_file, 
                  max_attempt=3, exit_on_error=False, conn=None, 
                  proc_id='Unknown'):
    
    CMD='get-prod_info'
    products, err = get_file_sample(media, args,sample_size, log_file, 
                  max_attempt=max_attempt,exit_on_error=exit_on_error, 
                  proc_id=proc_id)
    n_sel = len(products)
    if err==0:
        product_names = {get_prod_name(prod):prod  for  prod in products}
        prod_names = "('"+"','".join(list(product_names.keys()))+"')"
        err   = submit_query(query(prod_names), cursor, conn=conn)
        check_error(proc_id, err['code'], CMD, log_file, arg_err=err['msg'])
        if err['code']==0:
            prod_info = { product_names[prod[0]]:{'md5': prod[2], 
                                   'path': prod[1]} for prod in cursor }
            
        products = list(prod_info.keys())
        n_prod   = len(products)
        msg = 'Media {0}: {1} out of {2} of selected files were products'.format(media,
                                                                                  n_prod, 
                                                                                  n_sel)
        check_error(proc_id, 0, CMD, log_file, msg=msg, log_msg=True)
        return prod_info, n_prod, products
    else:
        return None, 0, None
コード例 #6
0
    def checkdic(self, keys_list, cursor, version=1, conn=None):
        '''
        This function take a list of the field and cursor connection as input
        and return the correspoinding keys lists if it exist in the database.
        checkdic(list,cursor,*version,*connection)->list,error_code
        '''
        #         #get connection to database
        #         try:
        #             conn, cursor, err = db_connect(server='172.22.99.61', db='test')
        #         except:
        #             return None ,400 #error in connection to DB
        #query,check if the fields exist in the database

        #         query = '''SELECT field
        #                     FROM  eodas.field_x_keys
        #                     WHERE '{}' = ANY( keys) AND version = {}'''
        #         #select * from table where key_string = ANY(array column name)
        #         for key_string in keys_list:# NOT EFFICIENT!!!
        #             #check if key is in the array column keys,return corrispond field
        #             submit_query(query.format(key_string,version), cursor, commit=False, conn=conn)
        #             #print self.base_str.format('-' * 80, query.format(key_string))
        #             if cursor.rowcount==0:
        #                 fieldlist.append(None)
        #                 continue
        #             else:
        #                 fieldlist.append(cursor.fetchone()[0])
        #return the list of the keys, and error_code =0
        query = '''SELECT field, keys
                    FROM  eodas.field_x_keys
                    WHERE version = {}'''
        submit_query(query.format(version), cursor, commit=False, conn=conn)
        dic_kf = {}
        if cursor.rowcount == 0:
            return None, 401  #empty reply from the DB
        else:
            for row in cursor:
                for kv in row[1]:
                    if kv in dic_kf:
                        #this kv already have an associated field
                        dic_kf[kv].append(row[0])
                    else:
                        dic_kf[kv] = [row[0]]

        fieldlist = [dic_kf.get(attri, None) for attri in keys_list]
        return fieldlist, 0
コード例 #7
0
def main():

    log_dir = str(os_getenv('SYSTEM_LOG'))
    log_dir += '/DL197'
    if not isdir(log_dir):
        makedirs(log_dir)
    # log file
    log_global = os_join(log_dir, 'global_ingestion.log')
    log_zip_chk = os_join(log_dir, 'check_zip_content.log')

    args = process_args(log_global, default_arg)
    product_id = args.product_id
    proc_id += '_' + str(product_id)
    if args.format is None:
        metadata_fromat = 'xml'
    else:
        metadata_fromat = args.format
    if args.pattern is None:
        metadata_pattern = '/*/*.metadata'
    else:
        metadata_pattern = args.pattern

    #--- check processing directories
    pid = str(getpid())
    processing_dir = '{}/{}'.format(os_getenv('PROCESSING_DIR'), pid)
    processing_dir = '{}/{}'.format('/tmp', getpid())
    if not isdir(processing_dir):
        try:
            makedirs(processing_dir)
        except:
            msg = 'Unable to create the directory {}'.format(processing_dir)
            check_error(proc_id,
                        500,
                        'create-processing-dir',
                        log_global,
                        exit_on_error=True,
                        arg_err=msg)

    dir_lst = [pid, 'testzipdir', 'testzipdir2']
    for d in dir_lst:
        dTmp = '{}/{}'.format(processing_dir, d)
        if not isdir(dTmp):
            try:
                makedirs(dTmp)
            except:
                msg = 'Unable to create the directory {}'.format(dTmp)
                check_error(proc_id,
                            500,
                            'create-processing-dir',
                            log_global,
                            exit_on_error=True,
                            arg_err=msg)
    #--- go to local working directory
    chdir(processing_dir)

    #-- db connection
    conn, cursor, err = db_connect()
    check_error(proc_id,
                err['code'],
                'db-connect',
                log_global,
                exit_on_error=True)

    #--- Getting the product status
    query = db_query.get_product_status(product_id)
    err = submit_query(query, cursor, conn=conn)
    check_error(proc_id,
                err['code'],
                'get-product-status',
                log_global,
                exit_on_error=True,
                arg_err=err['msg'])
    check_query_res(cursor,
                    'get-product-status',
                    log_global,
                    conn=conn,
                    exit_on_error=True)

    product_status = cursor.fetchone()[0]

    # check if this is a new attempt to ingest a previously ARCHIVED product
    print('PRODUCT_STATUS : ' + product_status)
    if product_status != 'NEW':
        conn.close()
        check_error(proc_id,
                    800,
                    'get-product-status',
                    log_global,
                    exit_on_error=True,
                    arg_err=product_id)

    # update the product status to ACTIVE
    query = db_query.update_product_status(product_id, 'ACTIVE')
    err = submit_query(query, cursor, conn=conn, commit=True)
    check_error(proc_id,
                err['code'],
                'upd-product-status',
                log_global,
                exit_on_error=True,
                arg_err=err['msg'])

    # retrieve the ingestion parameters
    query = db_query.get_product_info(product_id)
    err = submit_query(query, cursor, conn=conn)
    check_error(proc_id,
                err['code'],
                'get-product-info',
                log_global,
                exit_on_error=True,
                arg_err=err['msg'])

    check_query_res(cursor,
                    'get-product-info',
                    log_global,
                    conn=conn,
                    exit_on_error=True)
    dTmp = cursor.fetchone()
    product_name = dTmp[0]
    product_type = dTmp[1]
    print 'Product Name: {}'.format(product_name)
    print 'Product Type: {}'.format(product_type)

    query = db_query.get_initial_path(product_id)
    err = submit_query(query, cursor, conn=conn)
    check_error(proc_id,
                err['code'],
                'get-product-info',
                log_global,
                exit_on_error=True,
                arg_err=err['msg'])

    check_query_res(cursor,
                    'get-product-info',
                    log_global,
                    conn=conn,
                    exit_on_error=True)

    query = db_query.get_duplicated_prod(product_id)
    err = submit_query(query, cursor, conn=conn)
    check_error(proc_id,
                err['code'],
                'get-product-info',
                log_global,
                exit_on_error=True,
                arg_err=err['msg'])
コード例 #8
0
ファイル: tape_utils.py プロジェクト: cactusspine/work30
def update_prod_info(media, args, cursor,conn, log_file,
                     max_attempt=3, exit_on_error=False, proc_id='Unknown', 
                     info=None):
    err_msg = 'Media {0}: '.format(media)
    err_msg += '{}' 
    
    CMD='update-prod-info-list'
    print '{0} - Media {1}: start query hsm'.format(get_cur_time(), media)
    if info is None:
        products, err = get_file_sample(media, args,100, log_file, 
                                        max_attempt=max_attempt,
                                        exit_on_error=exit_on_error, 
                                        proc_id=proc_id)
    else:
        products, err, info_out = get_file_sample(media, args,100, log_file, 
                                        max_attempt=max_attempt,
                                        exit_on_error=exit_on_error, 
                                        proc_id=proc_id, info=info)
    
    print '{0} - Media {1}: end query hsm'.format(get_cur_time(), media)
    if err!=0:
        if info is None:
            return {'code' : err}
        else:
            return {'code' : err}, {}
    product_names = [get_prod_name(prod)  for  prod in products]
     
    prod_names = "('"+"','".join(product_names)+"')"
    
    CMD='update-prod-info-check'
#    print '{0} - Media {1}: start status check'.format(get_cur_time(), media)
    query = db_query().count_prod_not_in_tape(prod_names)
    err   = submit_query(query, cursor, conn=conn)
    
    check_error(proc_id, err['code'], CMD, log_file, arg_err=err_msg.format(err['msg']))
    print '{0} - Media {1}: end status check'.format(get_cur_time(), media)
    if err['code']!=0:
        if info:
            return err, {}
        else:
            return err     
    n_not_tape =cursor.fetchone()[0]
    print '{0} - Media {1}: {2} files do not have tape status'.format(get_cur_time(), media, n_not_tape) 
    if  n_not_tape > 0:
        err = {'code':314, 'msg': '{} out of {} products are not in tape status'.format(n_not_tape, 
                                                                                        len(product_names))}
        check_error(proc_id, err['code'], CMD, log_file, arg_err=err_msg.format(err['msg']))
        
        query = db_query().get_prod_not_in_tape(prod_names)
        err_0   = submit_query(query, cursor, conn=conn)
        check_error(proc_id, err_0['code'], CMD, log_file, arg_err=err_msg.format(err['msg']))
        
        for c in cursor:
            msg ='{0} - Media {1}: {2}, {3}, {4}'.format(' '*19,media, c[0], c[1], c[2] ) 
            print  msg
            check_error(proc_id, 314, CMD, log_file, arg_err=msg)
            
        if info:
            return err, {}
        else:
            return err   
        
    CMD='update-prod-info'
    print '{0} - Media {1}: start status update'.format(get_cur_time(), media)
    query = db_query().update_prod_status(prod_names)
    err   = submit_query(query, cursor, conn=conn, commit=True)
    check_error(proc_id, err['code'], CMD, log_file, arg_err=err_msg.format(err['msg']))
    print '{0} - Media {1}: end status update'.format(get_cur_time(), media)
        
    if info is None:
        return err
    else:
        return err, info_out