def process_bibcodes_to_delete(extraction_directory, upload_mode): """method that creates the MarcXML for the bibcodes to delete""" logger.info("In function %s" % (inspect.stack()[0][3],)) #I create an unique file for all the bibcodes to delete: #I don't think it's necessary to split the content in groups, since the XML is really simple #I create the base object for the tree doc = libxml2.newDoc("1.0") root = doc.newChild(None, "collection", None) #then for each bibcode to delete I create the proper record for bibcode in BIBCODES_TO_DELETE_LIST: record = root.newChild(None, 'record', None) #I add to the record the 2 necessary datafields d970 = record.newChild(None, 'datafield', None) d970.setProp('tag', '970') d970.setProp('ind1', '') d970.setProp('ind2', '') #I create the subfield tag sub = d970.newChild(None, 'subfield', bibcode.replace('&', '&')) sub.setProp("code", "a") d980 = record.newChild(None, 'datafield', None) d980.setProp('tag', '980') d980.setProp('ind1', '') d980.setProp('ind2', '') #I create the subfield tag sub = d980.newChild(None, 'subfield', "DELETED") sub.setProp("code", "c") #I extract the node marcxml_string = doc.serialize('UTF-8', 1) #I remove the data doc.freeDoc() del doc #I write the bibcodes in the done bibcodes file w2f = write_files.WriteFile(extraction_directory, logger) w2f.write_done_bibcodes_to_file(BIBCODES_TO_DELETE_LIST) del w2f if upload_mode == 'concurrent': #I transform the xml in bibrecords bibrecord_object = [elem[0] for elem in bibrecord.create_records(marcxml_string)] #I upload the result with option append logger.warning('Upload of records to delete started.') bibupload_merger(bibrecord_object, logger, 'append') logger.warning('Upload of records to delete ended.') elif upload_mode == 'bibupload': filepath = os.path.join(settings.BASE_OUTPUT_PATH, extraction_directory, settings.BASE_BIBRECORD_FILES_DIR, settings.BIBCODE_TO_DELETE_OUT_NAME) with open(filepath, 'w') as marcxml_to_del_file: marcxml_to_del_file.write(marcxml_string) task_low_level_submission('bibupload', 'admin', '-a', filepath) logger.warning('File "%s" submitted to bibupload.' % filepath) else: logger.error('Upload mode "%s" not supported! File not uploaded' % upload_mode) return True
def upload_process(q_uplfile, lock_stdout, lock_donefiles, q_life, extraction_directory, extraction_name, upload_mode): """Worker that uploads the data in invenio""" logger.warning(multiprocessing.current_process().name + ' (upload worker) Process started') #I create a local logger fh = logging.FileHandler(os.path.join(pipeline_settings.BASE_OUTPUT_PATH, extraction_directory, pipeline_settings.BASE_LOGGING_PATH, multiprocessing.current_process().name+'_uploader_bibcodes.log')) fmt = logging.Formatter(pipeline_settings.LOGGING_FORMAT) fh.setFormatter(fmt) local_logger = logging.getLogger(pipeline_settings.LOGGING_UPLOAD_NAME) local_logger.addHandler(fh) local_logger.setLevel(logger.level) local_logger.propagate = False #I print the same message for the local logger local_logger.warning(multiprocessing.current_process().name + ' Process started') while(True): file_to_upload = q_uplfile.get() if len(file_to_upload) == 2: local_logger.info('Processing group "%s" with file "%s"' % (file_to_upload[0], file_to_upload[1])) else: local_logger.info('Message in queue "%s" ' % file_to_upload[0]) #first of all I check if the group I'm getting is a message from the manager saying that the workers are done if file_to_upload[0] == 'WORKERS DONE': local_logger.info('No more workers active: stopping to upload...') break else: #otherwise I have to upload the file try: filepath = file_to_upload[1] except IndexError: logger.error('Received the unexpected message "%s" from upload queue.' % file_to_upload[0]) break if upload_mode == 'concurrent': file_obj = open(filepath, 'rb') # I load the object in the file local_logger.warning('Upload of the group "%s" started' % file_to_upload[0]) merged_records = pickle.load(file_obj) file_obj.close() #finally I upload bibupload_merger(merged_records, local_logger, 'replace_or_insert') #I log that I uploaded the file lock_donefiles.acquire() with open(os.path.join(settings.BASE_OUTPUT_PATH, extraction_directory,settings.LIST_BIBREC_UPLOADED), 'a') as bibrec_file_obj: bibrec_file_obj.write(filepath + '\n') lock_donefiles.release() local_logger.warning('Upload of the group "%s" ended' % file_to_upload[0]) del merged_records elif upload_mode == 'bibupload': task_low_level_submission('bibupload', 'admin', '-i', '-r', '--pickled-input-file', '--update-mode', filepath) with open(os.path.join(settings.BASE_OUTPUT_PATH, extraction_directory,settings.LIST_BIBREC_UPLOADED), 'a') as bibrec_file_obj: bibrec_file_obj.write(filepath + '\n') local_logger.warning('File "%s" submitted to bibupload.' % filepath) else: local_logger.error('Upload mode "%s" not supported! File not uploaded' % upload_mode) #I tell the manager that I'm done and I'm exiting q_life.put(['UPLOAD DONE']) logger.warning(multiprocessing.current_process().name + ' (upload worker) job finished: exiting') local_logger.warning(multiprocessing.current_process().name + ' job finished: exiting') return
def merge_bibcodes_and_upload(bibcodes): """function that extracts, merges and uploads a bunch of bibcodes""" logger.setLevel(logging.WARNING) merged_records = merge_bibcodes(bibcodes) bibupload_merger(merged_records, logger)