Example #1
0
def manage_addOAIHarvester(self, id='', REQUEST=None, **kwargs):
    """ """
    if REQUEST is not None:
        form_data = dict(REQUEST.form)
    else:
        form_data = dict(kwargs)
    try:
        if id == '':
            id = processId(form_data.get('title', 'zharvester')).lower()
        ob = create_object(self, OAIHarvester, id)
        process_form(ob, IOAIHarvester, form_data)
        ob.update_sets()
    except Exception, e:
        transaction.abort()
        if REQUEST is not None:
            return REQUEST.RESPONSE.redirect(self.absolute_url() +
                '/manage_main?manage_tabs_message=%s' % quote(str(e)))
        else:
            raise
Example #2
0
    def update_zcatalog(self, records_reponse):
        """ Fetch data from an OAI Server and populate the repository's catalog
        Highly inneficient in terms of storage. It is not recommended to use
        ZCatalog storage.

        Arguments:
        records_reponse -- <record> Generator

        """
        visited_records = []
        catalog = self.aq_parent.getCatalog()
        for (header, meta, about), token in records_reponse:
            if not header or not meta: #Invalid record, passing through
                continue

            id = processId(header.identifier())
            catalog_record = catalog.searchResults(
                meta_type=OAIRecord.meta_type, id=id)
            visited_records.append(id)

            if len(catalog_record):
                record = catalog_record[0].getObject()
            else:
                manage_addOAIRecord(self, id=id, deleted=header.isDeleted(),
                                    about=unicode(about))
                record = self._getOb(id)
                record.harvester = self.id

            #Storing some fields for ZCatalog
            for key, val in meta.getMap().iteritems():
                if key == 'identifier' and val:
                    for ident in val:
                        if url_pattern.match(ident): #Storing only the url
                            record.dc_identifier = ident
                            break
                    else:
                        record.dc_identifier = val[0]
                elif key == 'description' and val:
                    record.dc_description = "\n".join(val)
                elif key == 'type' and val:
                    record.dc_type = "\n".join(val)
                elif key == 'author' and val:
                    record.dc_author = "\n".join(val)
                elif key == 'language' and val:
                    record.dc_language = []
                    for lang in val:
                        try:
                            record.dc_language.append(
                                pycountry.languages.get(
                                    alpha2=str(lang).lower()
                                ).alpha2
                            )
                        except KeyError:
                            continue
                else:
                    setattr(record, 'dc_' + key, val)

            record.header = header
            record.metadata = meta
            record.reindex_object()

            if self.resume_token != token:
                self.resume_token = token
                try:
                    transaction.commit()
                except:
                    pass

        #Deleting unaffected OAIRecords
        for oai_record in self.getCatalog().searchResults(
            meta_type=OAIRecord.meta_type, harvester=self.id):
            if oai_record.id not in visited_records:
                self.manage_delObjects(oai_record.id)
Example #3
0
    def update_sqlalchemy(self, records_reponse):
        """ The recommended way to store OAI Records from remote servers

        Arguments:
        records_reponse -- <record> Generator

        """
        session = self.aq_parent.get_session()
        record_insert_list = []

        record_map_insert_list = []
        record_full_insert_list = []

        record_map_delete_list = []
        record_full_delete_list = []

        try:
            for (header, meta, about), token in records_reponse:
                if not header or not meta: #Invalid record, passing through
                    continue
                record_id = processId(header.identifier())
                record = session.query(OAIRecordMapper).filter(
                    OAIRecordMapper.id==record_id).all()

                record_map = []
                if record:
                    record = record[0]
                    if header.isDeleted():
                        session.delete(record)
                        session.commit()
                        continue
                    #Get existing records
                    record_maps = session.query(OAIRecordMapMapper).\
                    filter(OAIRecordMapMapper.record_id==record_id).all()

                    record_fullmaps = session.query(OAIRecordMapFullMapper).\
                    filter(OAIRecordMapFullMapper.record_id==record_id).all()

                    for map_item in record_maps:
                        record_map.append({
                            'id': map_item.id,
                            'lang': map_item.lang,
                            'record_id': map_item.record_id,
                            'key': map_item.key,
                            'value': map_item.value
                        })

                    for map_item in record_fullmaps:
                        record_map.append({
                            'id': map_item.id,
                            'lang': map_item.lang,
                            'record_id': map_item.record_id,
                            'key': map_item.key,
                            'value': map_item.value
                        })
                else:
                    record_insert_list.append({'id': record_id,
                                                'harvester': self.id})
                new_record_map = []
                #Get the request metadata data dict and make it flat
                for key, values in meta.getMap().iteritems():
                    if key.find(':') != -1:
                        key, lang = key.split(':')
                    else:
                        lang = None

                    map_dict = {
                        'record_id': record_id,
                        'lang': lang,
                        'key': unicode('dc_' + key),
                    }
                    for value in values:
                        if value:
                            if key == 'identifier':
                                #Storing only the url
                                if url_pattern.match(value):
                                    map_dict.update({'value': value})
                                    new_record_map.append(dict(map_dict))
                                    break
                            elif key == 'language':
                                try:
                                    country_code = pycountry.languages.get(
                                        alpha2=str(value).lower()
                                    ).alpha2
                                    map_dict.update({'value': country_code})
                                    new_record_map.append(dict(map_dict))
                                except:
                                    continue
                            else:
                                map_dict.update({'value': value})
                                new_record_map.append(dict(map_dict))

                diff = ListDictDiffer(new_record_map, record_map, ('id', ))

                #Insert list
                for map_dict in diff.added():
                    if map_dict['key'] in ('dc_title', 'dc_description'):
                        record_full_insert_list.append(map_dict)
                    else:
                        record_map_insert_list.append(map_dict)

                #Delete list - write some tests for this
                for list_index in diff.removed_index():
                    map_dict = record_map[list_index]
                    if map_dict['key'] in ('dc_title', 'dc_description'):
                        record_full_delete_list.append(map_dict['id'])
                    else:
                        record_map_delete_list.append(map_dict['id'])

                #Resumption token changed -> Make a bulk insert/delete
                if self.resume_token != token:
                    self.resume_token = token
                    try:
                        transaction.commit()
                    except:
                        pass

                    if record_insert_list:
                        session.bind.execute(
                            sqlalchemy_setup.tables['records_table'].\
                            insert(),
                            record_insert_list)
                        record_insert_list = []

                    #Insert
                    if record_map_insert_list:
                        session.bind.execute(
                            sqlalchemy_setup.tables['records_map_table'].\
                            insert(),
                            record_map_insert_list)
                        record_map_insert_list = []

                    if record_full_insert_list:
                        session.bind.execute(
                            sqlalchemy_setup.tables['records_map_full_table'].\
                            insert(),
                            record_full_insert_list)
                        record_full_insert_list = []

                    if record_map_delete_list:
                        table = sqlalchemy_setup.tables['records_map_table']
                        session.bind.execute(
                            table.delete().where(
                                table.columns.id.in_(record_map_delete_list))
                        )
                        record_map_delete_list = []

                    if record_full_delete_list:
                        table = sqlalchemy_setup.tables['records_map_full_table']
                        session.bind.execute(
                            table.delete().where(
                                table.columns.id.in_(record_full_delete_list))
                        )
                        record_full_delete_list = []

        except XMLSyntaxError, e:
            logging.error('Update failed with: %s' % str(e))