Example #1
0
 def do_collection_level_metadata(self):
     filepath = self.alias_dir
     os.makedirs(filepath, exist_ok=True)
     files = [i for i in os.listdir(filepath)]
     if 'Collection_TotalRecs.xml' not in files:
         CdmAPI.write_xml_to_file(
             CdmAPI.retrieve_collection_total_recs(self.alias), filepath,
             'Collection_TotalRecs')
         logging.info('{} Collection_TotalRecs.xml written'.format(
             self.alias))
     if 'Collection_Metadata.xml' not in files:
         CdmAPI.write_xml_to_file(
             CdmAPI.retrieve_collection_metadata(self.alias), filepath,
             'Collection_Metadata')
         logging.info('{} Collection_Metadata.xml written'.format(
             self.alias))
     if 'Collection_Fields.json' not in files:
         CdmAPI.write_json_to_file(
             CdmAPI.retrieve_collection_fields_json(self.alias), filepath,
             'Collection_Fields')
         logging.info('{} Collection_Fields.json written'.format(
             self.alias))
     if 'Collection_Fields.xml' not in files:
         CdmAPI.write_xml_to_file(
             CdmAPI.retrieve_collection_fields_xml(self.alias), filepath,
             'Collection_Fields')
         logging.info('{} Collection_Fields.xml written'.format(self.alias))
Example #2
0
 def write_chunk_of_elems_in_collection(self, starting_position, chunksize):
     filepath = self.alias_dir
     files = [i for i in os.listdir(filepath)]
     if 'Elems_in_Collection_{}.json'.format(starting_position) not in files:
         CdmAPI.write_json_to_file(
             CdmAPI.retrieve_elems_in_collection(self.alias, starting_position, chunksize, 'json'),
             filepath,
             'Elems_in_Collection_{}'.format(starting_position))
     if 'Elems_in_Collection_{}.xml'.format(starting_position) not in files:
         CdmAPI.write_xml_to_file(
             CdmAPI.retrieve_elems_in_collection(alias, starting_position, chunksize, 'xml'),
             filepath,
             'Elems_in_Collection_{}'.format(starting_position))
Example #3
0
    def write_metadata(self, target_dir, pointer, simple_or_cpd):
        # checks presence of file before calling to contentDM or overwriting file
        # there can be up to 4000 files checked here per alias,
        # so it is useful to take a snapshot of the directory tree beforehand,
        # instead of reading from the harddrive for each file.
        files = [file for root, dirs, files in self.tree_snapshot 
                 for file in files
                 if target_dir == root]

        if "{}.xml".format(pointer) not in files:
            xml_text = CdmAPI.retrieve_item_metadata(self.alias, pointer, 'xml')
            if is_it_a_404_xml(xml_text):
                self.unavailable_metadata.add(pointer)
            else:
                CdmAPI.write_xml_to_file(xml_text, target_dir, pointer)
                print(self.alias, pointer, 'wrote xml_text')

        if '{}.json'.format(pointer) not in files:
            json_text = CdmAPI.retrieve_item_metadata(self.alias, pointer, 'json')
            if is_it_a_404_json(json_text):
                self.unavailable_metadata.add(pointer)
            else:
                CdmAPI.write_json_to_file(json_text, target_dir, pointer)
            print(self.alias, pointer, 'wrote json_text')

        if '{}_parent.xml'.format(pointer) not in files:
            xml_parent_text = CdmAPI.retrieve_parent_info(self.alias, pointer, 'xml')
            if is_it_a_404_xml(xml_parent_text):
                self.unavailable_metadata.add(pointer)
            else:
                CdmAPI.write_xml_to_file(xml_parent_text, target_dir, '{}_parent'.format(pointer))
            print(self.alias, pointer, 'wrote xml_parent_text')

        if '{}_parent.json'.format(pointer) not in files:
            json_parent_text = CdmAPI.retrieve_parent_info(self.alias, pointer, 'json')
            if is_it_a_404_json(json_parent_text):
                self.unavailable_metadata.add(pointer)
            else:
                CdmAPI.write_json_to_file(json_parent_text, target_dir, '{}_parent'.format(pointer))
            print(self.alias, pointer, 'wrote json_parent_text')

        if simple_or_cpd == 'cpd':
            if '{}_cpd.xml'.format(pointer) not in files:
                index_file_text = CdmAPI.retrieve_compound_object(self.alias, pointer)
                if is_it_a_404_xml(index_file_text):
                    self.unavailable_metadata.add(pointer)
                else:
                    CdmAPI.write_xml_to_file(index_file_text, target_dir, '{}_cpd'.format(pointer))
                print(self.alias, pointer, 'wrote xml_index_file_text')
Example #4
0
 def write_chunk_of_elems_in_collection(self, starting_position, chunksize):
     filepath = self.alias_dir
     files = [i for i in os.listdir(filepath)]
     if 'Elems_in_Collection_{}.json'.format(
             starting_position) not in files:
         CdmAPI.write_json_to_file(
             CdmAPI.retrieve_elems_in_collection(self.alias,
                                                 starting_position,
                                                 chunksize, 'json'),
             filepath, 'Elems_in_Collection_{}'.format(starting_position))
         logging.info('{} Elems_in_Collection_{}.json written'.format(
             self.alias, starting_position))
     if 'Elems_in_Collection_{}.xml'.format(starting_position) not in files:
         CdmAPI.write_xml_to_file(
             CdmAPI.retrieve_elems_in_collection(alias, starting_position,
                                                 chunksize, 'xml'),
             filepath, 'Elems_in_Collection_{}'.format(starting_position))
         logging.info('{} Elems_in_Collection_{}.xml written'.format(
             self.alias, starting_position))
Example #5
0
 def do_collection_level_metadata(self):
     filepath = self.alias_dir
     os.makedirs(filepath, exist_ok=True)
     files = [i for i in os.listdir(filepath)]
     if 'Collection_TotalRecs.xml' not in files:
         CdmAPI.write_xml_to_file(
             CdmAPI.retrieve_collection_total_recs(self.alias),
             filepath,
             'Collection_TotalRecs')
     if 'Collection_Metadata.xml' not in files:
         CdmAPI.write_xml_to_file(
             CdmAPI.retrieve_collection_metadata(self.alias),
             filepath,
             'Collection_Metadata')
     if 'Collection_Fields.json' not in files:
         CdmAPI.write_json_to_file(
             CdmAPI.retrieve_collection_fields_json(self.alias),
             filepath,
             'Collection_Fields')
     if 'Collection_Fields.xml' not in files:
         CdmAPI.write_xml_to_file(
             CdmAPI.retrieve_collection_fields_xml(self.alias),
             filepath,
             'Collection_Fields')
Example #6
0
    def write_metadata(self, target_dir, pointer, simple_or_cpd):
        # checks presence of file before calling to contentDM or overwriting file
        # there can be up to 4000 files checked here per alias,
        # so it is useful to take a snapshot of the directory tree beforehand,
        # instead of reading from the harddrive for each file.
        files = [
            file for root, dirs, files in self.tree_snapshot for file in files
            if target_dir == root
        ]

        if "{}.xml".format(pointer) not in files:
            xml_text = CdmAPI.retrieve_item_metadata(self.alias, pointer,
                                                     'xml')
            if is_it_a_404_xml(xml_text):
                logging.warning('{} {}.xml is 404'.format(self.alias, pointer))
            else:
                CdmAPI.write_xml_to_file(xml_text, target_dir, pointer)
                logging.info('{} {} xml_text written'.format(
                    self.alias, pointer))

        if '{}.json'.format(pointer) not in files:
            json_text = CdmAPI.retrieve_item_metadata(self.alias, pointer,
                                                      'json')
            if is_it_a_404_json(json_text):
                logging.warning('{} {}.json is 404'.format(
                    self.alias, pointer))
            else:
                CdmAPI.write_json_to_file(json_text, target_dir, pointer)
                logging.info('{} {} json_text written'.format(
                    self.alias, pointer))

        if '{}_parent.xml'.format(pointer) not in files:
            xml_parent_text = CdmAPI.retrieve_parent_info(
                self.alias, pointer, 'xml')
            if is_it_a_404_xml(xml_parent_text):
                logging.warning('{} {}_parent.xml is 404'.format(
                    self.alias, pointer))
            else:
                CdmAPI.write_xml_to_file(xml_parent_text, target_dir,
                                         '{}_parent'.format(pointer))
                logging.info('{} {} xml_parent_text written'.format(
                    self.alias, pointer))

        if '{}_parent.json'.format(pointer) not in files:
            json_parent_text = CdmAPI.retrieve_parent_info(
                self.alias, pointer, 'json')
            if is_it_a_404_json(json_parent_text):
                logging.warning('{} {}_parent.json is 404'.format(
                    self.alias, pointer))
            else:
                CdmAPI.write_json_to_file(json_parent_text, target_dir,
                                          '{}_parent'.format(pointer))
                logging.info('{} {} json_parent_text written'.format(
                    self.alias, pointer))

        if simple_or_cpd == 'cpd':
            if '{}_cpd.xml'.format(pointer) not in files:
                index_file_text = CdmAPI.retrieve_compound_object(
                    self.alias, pointer)
                if is_it_a_404_xml(index_file_text):
                    logging.warning('{} {}_cpd.xml is 404'.format(
                        self.alias, pointer))
                else:
                    CdmAPI.write_xml_to_file(index_file_text, target_dir,
                                             '{}_cpd'.format(pointer))
                    logging.info('{} {} xml_index_file_text written'.format(
                        self.alias, pointer))