def get_md5_path_from_db(f_lst, conn, cursor, log_file, proc_id='Unknown', hsm_root=False, dms_root=False, product_id=False, cursor_factory=False): CMD = 'get-prod-md5-path' f_lst_str = convert_list_to_db_str(f_lst) query = db_query.get_prod_path_md5_products(p_name_lst=f_lst_str, hsm_root=hsm_root, dms_root=dms_root) err = submit_query(query, cursor, conn=conn) check_error(proc_id, err['code'], CMD, log_file, arg_err=err['msg']) keys = ['product_file', 'md5', 'id', 'name'] if product_id: keys.append('id') out = {k: [] for k in keys} if err['code'] == 0: err_code = 0 if cursor_factory: for res in cursor: for k in keys: out[k].append(res[k]) else: for res in cursor: for i, k in enumerate(keys): out[k].append(res[i]) return err['code'], out
def checkdic(self, keys_list, cursor, version=1, conn=None): ''' This function take a list of the field and cursor connection as input and return the corresponding keys lists if it exist in the database. checkdic(list,cursor,*version,*connection)->list,error_code ''' query = '''SELECT field, keys FROM eodas.field_x_keys WHERE version = {}''' err = submit_query(query.format(version), cursor, commit=False, conn=conn) if err['code'] != 0: print err['ms'] return [], err['code'] dic_kf = {} if cursor.rowcount == 0: return None, 401 # empty reply from the DB else: for row in cursor: for kv in row[1]: if kv in dic_kf: # this kv already have an associated field dic_kf[kv].append(row[0]) else: dic_kf[kv] = [row[0]] #fieldlist = [dic_kf[attri] for attri in keys_list if attri in dic_kf.keys()] fieldlist = [dic_kf.get(attri, None) for attri in keys_list] return fieldlist, 0
def get_product_md5_and_path(p_name, conn, cursor, log_file, proc_id='Unknown', p_id=None): CMD = 'get-prod-md5-path' query = db_query.get_prod_path_md5(p_name) err = submit_query(query, cursor, conn=conn) check_error(proc_id, err['code'], CMD, log_file, arg_err=err['msg']) info = { 'md5': None, 'path': None, } if err['code'] != 0: err_code = err['code'] else: res = cursor.fetchone() if res: err_code = 0 if p_id: info['id'] = None try: for k in info.keys(): info[k] = res[k] except: for i, k in enumerate(info.keys()): info[k] = res[i] else: err_code = 501 return err_code, info
def get_product_md5(p_name, conn, cursor, log_file, proc_id='Unknown', pid=False): CMD = 'get-prod-md5' query = db_query.get_md5_product(p_name) err = submit_query(query, cursor, conn=conn) check_error(proc_id, err['code'], CMD, log_file, arg_err=err['msg']) if err['code'] != 0: if pid: return err['code'], None, None else: return err['code'], None res = cursor.fetchone() if res is None: if pid: return 501, None, None else: return 501, None else: try: if pid: return 0, res['md5'], res['id'] else: return 0, res['md5'] except: if pid: return 0, res[0], res[1] else: return 0, res[0]
def get_prod_info(media, args, cursor, query, sample_size, log_file, max_attempt=3, exit_on_error=False, conn=None, proc_id='Unknown'): CMD='get-prod_info' products, err = get_file_sample(media, args,sample_size, log_file, max_attempt=max_attempt,exit_on_error=exit_on_error, proc_id=proc_id) n_sel = len(products) if err==0: product_names = {get_prod_name(prod):prod for prod in products} prod_names = "('"+"','".join(list(product_names.keys()))+"')" err = submit_query(query(prod_names), cursor, conn=conn) check_error(proc_id, err['code'], CMD, log_file, arg_err=err['msg']) if err['code']==0: prod_info = { product_names[prod[0]]:{'md5': prod[2], 'path': prod[1]} for prod in cursor } products = list(prod_info.keys()) n_prod = len(products) msg = 'Media {0}: {1} out of {2} of selected files were products'.format(media, n_prod, n_sel) check_error(proc_id, 0, CMD, log_file, msg=msg, log_msg=True) return prod_info, n_prod, products else: return None, 0, None
def checkdic(self, keys_list, cursor, version=1, conn=None): ''' This function take a list of the field and cursor connection as input and return the correspoinding keys lists if it exist in the database. checkdic(list,cursor,*version,*connection)->list,error_code ''' # #get connection to database # try: # conn, cursor, err = db_connect(server='172.22.99.61', db='test') # except: # return None ,400 #error in connection to DB #query,check if the fields exist in the database # query = '''SELECT field # FROM eodas.field_x_keys # WHERE '{}' = ANY( keys) AND version = {}''' # #select * from table where key_string = ANY(array column name) # for key_string in keys_list:# NOT EFFICIENT!!! # #check if key is in the array column keys,return corrispond field # submit_query(query.format(key_string,version), cursor, commit=False, conn=conn) # #print self.base_str.format('-' * 80, query.format(key_string)) # if cursor.rowcount==0: # fieldlist.append(None) # continue # else: # fieldlist.append(cursor.fetchone()[0]) #return the list of the keys, and error_code =0 query = '''SELECT field, keys FROM eodas.field_x_keys WHERE version = {}''' submit_query(query.format(version), cursor, commit=False, conn=conn) dic_kf = {} if cursor.rowcount == 0: return None, 401 #empty reply from the DB else: for row in cursor: for kv in row[1]: if kv in dic_kf: #this kv already have an associated field dic_kf[kv].append(row[0]) else: dic_kf[kv] = [row[0]] fieldlist = [dic_kf.get(attri, None) for attri in keys_list] return fieldlist, 0
def main(): log_dir = str(os_getenv('SYSTEM_LOG')) log_dir += '/DL197' if not isdir(log_dir): makedirs(log_dir) # log file log_global = os_join(log_dir, 'global_ingestion.log') log_zip_chk = os_join(log_dir, 'check_zip_content.log') args = process_args(log_global, default_arg) product_id = args.product_id proc_id += '_' + str(product_id) if args.format is None: metadata_fromat = 'xml' else: metadata_fromat = args.format if args.pattern is None: metadata_pattern = '/*/*.metadata' else: metadata_pattern = args.pattern #--- check processing directories pid = str(getpid()) processing_dir = '{}/{}'.format(os_getenv('PROCESSING_DIR'), pid) processing_dir = '{}/{}'.format('/tmp', getpid()) if not isdir(processing_dir): try: makedirs(processing_dir) except: msg = 'Unable to create the directory {}'.format(processing_dir) check_error(proc_id, 500, 'create-processing-dir', log_global, exit_on_error=True, arg_err=msg) dir_lst = [pid, 'testzipdir', 'testzipdir2'] for d in dir_lst: dTmp = '{}/{}'.format(processing_dir, d) if not isdir(dTmp): try: makedirs(dTmp) except: msg = 'Unable to create the directory {}'.format(dTmp) check_error(proc_id, 500, 'create-processing-dir', log_global, exit_on_error=True, arg_err=msg) #--- go to local working directory chdir(processing_dir) #-- db connection conn, cursor, err = db_connect() check_error(proc_id, err['code'], 'db-connect', log_global, exit_on_error=True) #--- Getting the product status query = db_query.get_product_status(product_id) err = submit_query(query, cursor, conn=conn) check_error(proc_id, err['code'], 'get-product-status', log_global, exit_on_error=True, arg_err=err['msg']) check_query_res(cursor, 'get-product-status', log_global, conn=conn, exit_on_error=True) product_status = cursor.fetchone()[0] # check if this is a new attempt to ingest a previously ARCHIVED product print('PRODUCT_STATUS : ' + product_status) if product_status != 'NEW': conn.close() check_error(proc_id, 800, 'get-product-status', log_global, exit_on_error=True, arg_err=product_id) # update the product status to ACTIVE query = db_query.update_product_status(product_id, 'ACTIVE') err = submit_query(query, cursor, conn=conn, commit=True) check_error(proc_id, err['code'], 'upd-product-status', log_global, exit_on_error=True, arg_err=err['msg']) # retrieve the ingestion parameters query = db_query.get_product_info(product_id) err = submit_query(query, cursor, conn=conn) check_error(proc_id, err['code'], 'get-product-info', log_global, exit_on_error=True, arg_err=err['msg']) check_query_res(cursor, 'get-product-info', log_global, conn=conn, exit_on_error=True) dTmp = cursor.fetchone() product_name = dTmp[0] product_type = dTmp[1] print 'Product Name: {}'.format(product_name) print 'Product Type: {}'.format(product_type) query = db_query.get_initial_path(product_id) err = submit_query(query, cursor, conn=conn) check_error(proc_id, err['code'], 'get-product-info', log_global, exit_on_error=True, arg_err=err['msg']) check_query_res(cursor, 'get-product-info', log_global, conn=conn, exit_on_error=True) query = db_query.get_duplicated_prod(product_id) err = submit_query(query, cursor, conn=conn) check_error(proc_id, err['code'], 'get-product-info', log_global, exit_on_error=True, arg_err=err['msg'])
def update_prod_info(media, args, cursor,conn, log_file, max_attempt=3, exit_on_error=False, proc_id='Unknown', info=None): err_msg = 'Media {0}: '.format(media) err_msg += '{}' CMD='update-prod-info-list' print '{0} - Media {1}: start query hsm'.format(get_cur_time(), media) if info is None: products, err = get_file_sample(media, args,100, log_file, max_attempt=max_attempt, exit_on_error=exit_on_error, proc_id=proc_id) else: products, err, info_out = get_file_sample(media, args,100, log_file, max_attempt=max_attempt, exit_on_error=exit_on_error, proc_id=proc_id, info=info) print '{0} - Media {1}: end query hsm'.format(get_cur_time(), media) if err!=0: if info is None: return {'code' : err} else: return {'code' : err}, {} product_names = [get_prod_name(prod) for prod in products] prod_names = "('"+"','".join(product_names)+"')" CMD='update-prod-info-check' # print '{0} - Media {1}: start status check'.format(get_cur_time(), media) query = db_query().count_prod_not_in_tape(prod_names) err = submit_query(query, cursor, conn=conn) check_error(proc_id, err['code'], CMD, log_file, arg_err=err_msg.format(err['msg'])) print '{0} - Media {1}: end status check'.format(get_cur_time(), media) if err['code']!=0: if info: return err, {} else: return err n_not_tape =cursor.fetchone()[0] print '{0} - Media {1}: {2} files do not have tape status'.format(get_cur_time(), media, n_not_tape) if n_not_tape > 0: err = {'code':314, 'msg': '{} out of {} products are not in tape status'.format(n_not_tape, len(product_names))} check_error(proc_id, err['code'], CMD, log_file, arg_err=err_msg.format(err['msg'])) query = db_query().get_prod_not_in_tape(prod_names) err_0 = submit_query(query, cursor, conn=conn) check_error(proc_id, err_0['code'], CMD, log_file, arg_err=err_msg.format(err['msg'])) for c in cursor: msg ='{0} - Media {1}: {2}, {3}, {4}'.format(' '*19,media, c[0], c[1], c[2] ) print msg check_error(proc_id, 314, CMD, log_file, arg_err=msg) if info: return err, {} else: return err CMD='update-prod-info' print '{0} - Media {1}: start status update'.format(get_cur_time(), media) query = db_query().update_prod_status(prod_names) err = submit_query(query, cursor, conn=conn, commit=True) check_error(proc_id, err['code'], CMD, log_file, arg_err=err_msg.format(err['msg'])) print '{0} - Media {1}: end status update'.format(get_cur_time(), media) if info is None: return err else: return err, info_out