def write_chunk_of_elems_in_collection(self, starting_position, chunksize): filepath = self.alias_dir files = [i for i in os.listdir(filepath)] if 'Elems_in_Collection_{}.json'.format(starting_position) not in files: CdmAPI.write_json_to_file( CdmAPI.retrieve_elems_in_collection(self.alias, starting_position, chunksize, 'json'), filepath, 'Elems_in_Collection_{}'.format(starting_position)) if 'Elems_in_Collection_{}.xml'.format(starting_position) not in files: CdmAPI.write_xml_to_file( CdmAPI.retrieve_elems_in_collection(alias, starting_position, chunksize, 'xml'), filepath, 'Elems_in_Collection_{}'.format(starting_position))
def write_chunk_of_elems_in_collection(self, starting_position, chunksize): filepath = self.alias_dir files = [i for i in os.listdir(filepath)] if 'Elems_in_Collection_{}.json'.format( starting_position) not in files: CdmAPI.write_json_to_file( CdmAPI.retrieve_elems_in_collection(self.alias, starting_position, chunksize, 'json'), filepath, 'Elems_in_Collection_{}'.format(starting_position)) logging.info('{} Elems_in_Collection_{}.json written'.format( self.alias, starting_position)) if 'Elems_in_Collection_{}.xml'.format(starting_position) not in files: CdmAPI.write_xml_to_file( CdmAPI.retrieve_elems_in_collection(alias, starting_position, chunksize, 'xml'), filepath, 'Elems_in_Collection_{}'.format(starting_position)) logging.info('{} Elems_in_Collection_{}.xml written'.format( self.alias, starting_position))
def do_collection_level_metadata(self): filepath = self.alias_dir os.makedirs(filepath, exist_ok=True) files = [i for i in os.listdir(filepath)] if 'Collection_TotalRecs.xml' not in files: CdmAPI.write_xml_to_file( CdmAPI.retrieve_collection_total_recs(self.alias), filepath, 'Collection_TotalRecs') logging.info('{} Collection_TotalRecs.xml written'.format( self.alias)) if 'Collection_Metadata.xml' not in files: CdmAPI.write_xml_to_file( CdmAPI.retrieve_collection_metadata(self.alias), filepath, 'Collection_Metadata') logging.info('{} Collection_Metadata.xml written'.format( self.alias)) if 'Collection_Fields.json' not in files: CdmAPI.write_json_to_file( CdmAPI.retrieve_collection_fields_json(self.alias), filepath, 'Collection_Fields') logging.info('{} Collection_Fields.json written'.format( self.alias)) if 'Collection_Fields.xml' not in files: CdmAPI.write_xml_to_file( CdmAPI.retrieve_collection_fields_xml(self.alias), filepath, 'Collection_Fields') logging.info('{} Collection_Fields.xml written'.format(self.alias))
def write_metadata(self, target_dir, pointer, simple_or_cpd): # checks presence of file before calling to contentDM or overwriting file # there can be up to 4000 files checked here per alias, # so it is useful to take a snapshot of the directory tree beforehand, # instead of reading from the harddrive for each file. files = [file for root, dirs, files in self.tree_snapshot for file in files if target_dir == root] if "{}.xml".format(pointer) not in files: xml_text = CdmAPI.retrieve_item_metadata(self.alias, pointer, 'xml') if is_it_a_404_xml(xml_text): self.unavailable_metadata.add(pointer) else: CdmAPI.write_xml_to_file(xml_text, target_dir, pointer) print(self.alias, pointer, 'wrote xml_text') if '{}.json'.format(pointer) not in files: json_text = CdmAPI.retrieve_item_metadata(self.alias, pointer, 'json') if is_it_a_404_json(json_text): self.unavailable_metadata.add(pointer) else: CdmAPI.write_json_to_file(json_text, target_dir, pointer) print(self.alias, pointer, 'wrote json_text') if '{}_parent.xml'.format(pointer) not in files: xml_parent_text = CdmAPI.retrieve_parent_info(self.alias, pointer, 'xml') if is_it_a_404_xml(xml_parent_text): self.unavailable_metadata.add(pointer) else: CdmAPI.write_xml_to_file(xml_parent_text, target_dir, '{}_parent'.format(pointer)) print(self.alias, pointer, 'wrote xml_parent_text') if '{}_parent.json'.format(pointer) not in files: json_parent_text = CdmAPI.retrieve_parent_info(self.alias, pointer, 'json') if is_it_a_404_json(json_parent_text): self.unavailable_metadata.add(pointer) else: CdmAPI.write_json_to_file(json_parent_text, target_dir, '{}_parent'.format(pointer)) print(self.alias, pointer, 'wrote json_parent_text') if simple_or_cpd == 'cpd': if '{}_cpd.xml'.format(pointer) not in files: index_file_text = CdmAPI.retrieve_compound_object(self.alias, pointer) if is_it_a_404_xml(index_file_text): self.unavailable_metadata.add(pointer) else: CdmAPI.write_xml_to_file(index_file_text, target_dir, '{}_cpd'.format(pointer)) print(self.alias, pointer, 'wrote xml_index_file_text')
def do_collection_level_metadata(self): filepath = self.alias_dir os.makedirs(filepath, exist_ok=True) files = [i for i in os.listdir(filepath)] if 'Collection_TotalRecs.xml' not in files: CdmAPI.write_xml_to_file( CdmAPI.retrieve_collection_total_recs(self.alias), filepath, 'Collection_TotalRecs') if 'Collection_Metadata.xml' not in files: CdmAPI.write_xml_to_file( CdmAPI.retrieve_collection_metadata(self.alias), filepath, 'Collection_Metadata') if 'Collection_Fields.json' not in files: CdmAPI.write_json_to_file( CdmAPI.retrieve_collection_fields_json(self.alias), filepath, 'Collection_Fields') if 'Collection_Fields.xml' not in files: CdmAPI.write_xml_to_file( CdmAPI.retrieve_collection_fields_xml(self.alias), filepath, 'Collection_Fields')
def do_repo_level_objects(repo_dir): os.makedirs(repo_dir, exist_ok=True) if not os.path.isfile(os.path.join(repo_dir, 'Collections_List.xml')): coll_list_txt = CdmAPI.retrieve_collections_list() CdmAPI.write_xml_to_file(coll_list_txt, repo_dir, 'Collections_List') logging.info('Collection_List.xml written')
def write_metadata(self, target_dir, pointer, simple_or_cpd): # checks presence of file before calling to contentDM or overwriting file # there can be up to 4000 files checked here per alias, # so it is useful to take a snapshot of the directory tree beforehand, # instead of reading from the harddrive for each file. files = [ file for root, dirs, files in self.tree_snapshot for file in files if target_dir == root ] if "{}.xml".format(pointer) not in files: xml_text = CdmAPI.retrieve_item_metadata(self.alias, pointer, 'xml') if is_it_a_404_xml(xml_text): logging.warning('{} {}.xml is 404'.format(self.alias, pointer)) else: CdmAPI.write_xml_to_file(xml_text, target_dir, pointer) logging.info('{} {} xml_text written'.format( self.alias, pointer)) if '{}.json'.format(pointer) not in files: json_text = CdmAPI.retrieve_item_metadata(self.alias, pointer, 'json') if is_it_a_404_json(json_text): logging.warning('{} {}.json is 404'.format( self.alias, pointer)) else: CdmAPI.write_json_to_file(json_text, target_dir, pointer) logging.info('{} {} json_text written'.format( self.alias, pointer)) if '{}_parent.xml'.format(pointer) not in files: xml_parent_text = CdmAPI.retrieve_parent_info( self.alias, pointer, 'xml') if is_it_a_404_xml(xml_parent_text): logging.warning('{} {}_parent.xml is 404'.format( self.alias, pointer)) else: CdmAPI.write_xml_to_file(xml_parent_text, target_dir, '{}_parent'.format(pointer)) logging.info('{} {} xml_parent_text written'.format( self.alias, pointer)) if '{}_parent.json'.format(pointer) not in files: json_parent_text = CdmAPI.retrieve_parent_info( self.alias, pointer, 'json') if is_it_a_404_json(json_parent_text): logging.warning('{} {}_parent.json is 404'.format( self.alias, pointer)) else: CdmAPI.write_json_to_file(json_parent_text, target_dir, '{}_parent'.format(pointer)) logging.info('{} {} json_parent_text written'.format( self.alias, pointer)) if simple_or_cpd == 'cpd': if '{}_cpd.xml'.format(pointer) not in files: index_file_text = CdmAPI.retrieve_compound_object( self.alias, pointer) if is_it_a_404_xml(index_file_text): logging.warning('{} {}_cpd.xml is 404'.format( self.alias, pointer)) else: CdmAPI.write_xml_to_file(index_file_text, target_dir, '{}_cpd'.format(pointer)) logging.info('{} {} xml_index_file_text written'.format( self.alias, pointer))