def bootstrap_oai(self, endpoint): #todo update granularity """bootstraps OAI-PMH Source""" startdate = self.config['fromdate'] self.logger.debug("Connecting to OAI-Endpoint %s" % endpoint) self.client = Client(endpoint, self.config['limit'], self.config['checkurl']) try: no_records = 0 for i, record in enumerate( self.client.listRecords(startdate, delay=self.config['delay_time'])): no_records += self.process_record(record, init=True) self.lastcheckdate = record.responseDate() self.logger.info( "Finished adding %d initial resources with checkdate: %s" % ((no_records * 2), self.lastcheckdate)) except URLError, e: self.logger.error("URLError: %s" % (e))
def bootstrap_oai(self,endpoint): #todo update granularity """bootstraps OAI-PMH Source""" startdate=self.config['fromdate'] self.logger.debug("Connecting to OAI-Endpoint %s" % endpoint) self.client=Client(endpoint,self.config['limit'],self.config['checkurl']) try: no_records=0 for i,record in enumerate(self.client.listRecords(startdate,delay=self.config['delay_time'])): no_records+=self.process_record(record,init=True) self.lastcheckdate=record.responseDate() self.logger.info("Finished adding %d initial resources with checkdate: %s" % ((no_records*2),self.lastcheckdate)) except URLError, e: self.logger.error("URLError: %s" % (e))
class Source(Observable): """A source contains a list of resources and changes over time""" RESOURCE_PATH = "/resources" STATIC_FILE_PATH = os.path.join(os.path.dirname(__file__), "static") TEMP_FILE_PATH = os.path.join(os.path.dirname(__file__), "temp") def __init__(self, config, hostname, port): """Initalize the source""" super(Source, self).__init__() self.logger = logging.getLogger('source') self.config = config self.logger.info("Source config: %s " % self.config) self.hostname = hostname self.port = port self.max_res_id = 1 self._repository = {} # {basename, {timestamp, size}} self.inventory_builder = None # The inventory builder implementation self.changememory = None # The change memory implementation self.no_events = 0 self.oaimapping = {} #oai self.client = None #oai self.lastcheckdate = dateutil_parser.parse( config['fromdate'].strftime("%Y-%m-%d %H:%SZ")) #oai ##### Source capabilities ##### def add_inventory_builder(self, inventory_builder): """Adds an inventory builder implementation""" self.inventory_builder = inventory_builder @property def has_inventory_builder(self): """Returns True in the Source has an inventory builder""" return bool(self.inventory_builder is not None) def add_changememory(self, changememory): """Adds a changememory implementation""" self.changememory = changememory @property def has_changememory(self): """Returns True if a source maintains a change memory""" return bool(self.changememory is not None) ##### Bootstrap Source ###### def bootstrap(self): """Bootstrap the source with a set of resources""" self.logger.info("Bootstrapping source") if self.has_changememory: self.changememory.bootstrap() if self.has_inventory_builder: self.inventory_builder.bootstrap() self._log_stats() ##### Source data accessors ##### @property def base_uri(self): """Returns the base URI of the source (e.g., http://localhost:8888)""" return "http://" + self.hostname + ":" + str(self.port) @property def resource_count(self): """The number of resources in the source's repository""" return len(self._repository) @property def resources(self): """Iterates over resources and yields resource objects""" for basename in self._repository.keys(): resource = self.resource(basename) if resource is None: self.logger.error("Cannot create resource %s " % basename + \ "because source object has been deleted.") else: yield resource @property def random_resource(self): """Returns a single random resource""" rand_res = self.random_resources() if len(rand_res) == 1: return rand_res[0] else: return None def resource(self, basename): """Creates and returns a resource object from internal resource repository. Repositoy values are copied into the object.""" if not self._repository.has_key(basename): return None uri = basename timestamp = self._repository[basename]['timestamp'] return Resource(uri=uri, timestamp=timestamp) def random_resources(self, number=1): "Return a random set of resources, at most all resources" if number > len(self._repository): number = len(self._repository) rand_basenames = random.sample(self._repository.keys(), number) return [self.resource(basename) for basename in rand_basenames] # Private Methods def _create_resource(self, basename=None, identifier=None, timestamp=time.time(), notify_observers=True, oai=True): """Create a new resource, add it to the source, notify observers.""" self._repository[basename] = {'timestamp': timestamp} change = ResourceChange(resource=self.resource(basename), changetype="CREATED") if notify_observers: self.notify_observers(change) self.logger.debug("Event: %s" % repr(change)) # add metadata resource url if oai: self._create_resource( basename=self.client.endpoint + "?verb=GetRecord&metadataPrefix=oai_dc&identifier=" + identifier, timestamp=timestamp, notify_observers=notify_observers, oai=False) self.oaimapping[identifier] = basename def _update_resource(self, basename, identifier, timestamp, oai=True): """Update a resource, notify observers.""" self._repository[basename] = {'timestamp': timestamp} change = ResourceChange(resource=self.resource(basename), changetype="UPDATED") self.notify_observers(change) self.logger.debug("Event: %s" % repr(change)) # update metadata resource url if oai: self._update_resource( self.client.endpoint + "?verb=GetRecord&metadataPrefix=oai_dc&identifier=" + identifier, identifier, timestamp, oai=False) def _delete_resource(self, identifier, timestamp, notify_observers=True, oai=True): """Delete a given resource, notify observers.""" basename = None if oai: basename = self.oaimapping[identifier] del self.oaimapping[identifier] # delete metadata resource url self._delete_resource(identifier, timestamp, notify_observers=notify_observers, oai=False) else: basename = self.client.endpoint + "?verb=GetRecord&metadataPrefix=oai_dc&identifier=" + identifier res = self.resource(basename) del self._repository[basename] res.timestamp = timestamp if notify_observers: change = ResourceChange(resource=res, changetype="DELETED") self.notify_observers(change) self.logger.debug("Event: %s" % repr(change)) def bootstrap_oai(self, endpoint): #todo update granularity """bootstraps OAI-PMH Source""" startdate = self.config['fromdate'] self.logger.debug("Connecting to OAI-Endpoint %s" % endpoint) self.client = Client(endpoint, self.config['limit'], self.config['checkurl']) try: no_records = 0 for i, record in enumerate( self.client.listRecords(startdate, delay=self.config['delay_time'])): no_records += self.process_record(record, init=True) self.lastcheckdate = record.responseDate() self.logger.info( "Finished adding %d initial resources with checkdate: %s" % ((no_records * 2), self.lastcheckdate)) except URLError, e: self.logger.error("URLError: %s" % (e)) except NoRecordsException as e: self.logger.info("No new records found: %s" % e)
class Source(Observable): """A source contains a list of resources and changes over time""" RESOURCE_PATH = "/resources" STATIC_FILE_PATH = os.path.join(os.path.dirname(__file__), "static") TEMP_FILE_PATH = os.path.join(os.path.dirname(__file__), "temp") def __init__(self, config, hostname, port): """Initalize the source""" super(Source, self).__init__() self.logger = logging.getLogger('source') self.config = config self.logger.info("Source config: %s " % self.config) self.hostname = hostname self.port = port self.max_res_id = 1 self._repository = {} # {basename, {timestamp, size}} self.inventory_builder = None # The inventory builder implementation self.changememory = None # The change memory implementation self.no_events = 0 self.oaimapping = {} #oai self.client=None #oai self.lastcheckdate=dateutil_parser.parse(config['fromdate'].strftime("%Y-%m-%d %H:%SZ")) #oai ##### Source capabilities ##### def add_inventory_builder(self, inventory_builder): """Adds an inventory builder implementation""" self.inventory_builder = inventory_builder @property def has_inventory_builder(self): """Returns True in the Source has an inventory builder""" return bool(self.inventory_builder is not None) def add_changememory(self, changememory): """Adds a changememory implementation""" self.changememory = changememory @property def has_changememory(self): """Returns True if a source maintains a change memory""" return bool(self.changememory is not None) ##### Bootstrap Source ###### def bootstrap(self): """Bootstrap the source with a set of resources""" self.logger.info("Bootstrapping source") if self.has_changememory: self.changememory.bootstrap() if self.has_inventory_builder: self.inventory_builder.bootstrap() self._log_stats() ##### Source data accessors ##### @property def base_uri(self): """Returns the base URI of the source (e.g., http://localhost:8888)""" return "http://" + self.hostname + ":" + str(self.port) @property def resource_count(self): """The number of resources in the source's repository""" return len(self._repository) @property def resources(self): """Iterates over resources and yields resource objects""" for basename in self._repository.keys(): resource = self.resource(basename) if resource is None: self.logger.error("Cannot create resource %s " % basename + \ "because source object has been deleted.") else: yield resource @property def random_resource(self): """Returns a single random resource""" rand_res = self.random_resources() if len(rand_res) == 1: return rand_res[0] else: return None def resource(self, basename): """Creates and returns a resource object from internal resource repository. Repositoy values are copied into the object.""" if not self._repository.has_key(basename): return None uri = basename timestamp = self._repository[basename]['timestamp'] return Resource(uri = uri, timestamp = timestamp) def random_resources(self, number = 1): "Return a random set of resources, at most all resources" if number > len(self._repository): number = len(self._repository) rand_basenames = random.sample(self._repository.keys(), number) return [self.resource(basename) for basename in rand_basenames] # Private Methods def _create_resource(self, basename = None, identifier = None, timestamp=time.time(), notify_observers = True, oai = True): """Create a new resource, add it to the source, notify observers.""" self._repository[basename] = {'timestamp': timestamp} change = ResourceChange(resource = self.resource(basename), changetype = "CREATED") if notify_observers: self.notify_observers(change) self.logger.debug("Event: %s" % repr(change)) # add metadata resource url if oai: self._create_resource(basename=self.client.endpoint+"?verb=GetRecord&metadataPrefix=oai_dc&identifier="+identifier,timestamp=timestamp,notify_observers=notify_observers,oai=False) self.oaimapping[identifier]=basename; def _update_resource(self, basename, identifier, timestamp, oai = True): """Update a resource, notify observers.""" self._repository[basename] = {'timestamp': timestamp} change = ResourceChange( resource = self.resource(basename), changetype = "UPDATED") self.notify_observers(change) self.logger.debug("Event: %s" % repr(change)) # update metadata resource url if oai: self._update_resource(self.client.endpoint+"?verb=GetRecord&metadataPrefix=oai_dc&identifier="+identifier,identifier,timestamp,oai=False) def _delete_resource(self, identifier, timestamp, notify_observers = True, oai = True): """Delete a given resource, notify observers.""" basename=None if oai: basename=self.oaimapping[identifier] del self.oaimapping[identifier] # delete metadata resource url self._delete_resource(identifier,timestamp,notify_observers=notify_observers,oai=False) else: basename=self.client.endpoint+"?verb=GetRecord&metadataPrefix=oai_dc&identifier="+identifier res = self.resource(basename) del self._repository[basename] res.timestamp = timestamp if notify_observers: change = ResourceChange(resource = res, changetype = "DELETED") self.notify_observers(change) self.logger.debug("Event: %s" % repr(change)) def bootstrap_oai(self,endpoint): #todo update granularity """bootstraps OAI-PMH Source""" startdate=self.config['fromdate'] self.logger.debug("Connecting to OAI-Endpoint %s" % endpoint) self.client=Client(endpoint,self.config['limit'],self.config['checkurl']) try: no_records=0 for i,record in enumerate(self.client.listRecords(startdate,delay=self.config['delay_time'])): no_records+=self.process_record(record,init=True) self.lastcheckdate=record.responseDate() self.logger.info("Finished adding %d initial resources with checkdate: %s" % ((no_records*2),self.lastcheckdate)) except URLError, e: self.logger.error("URLError: %s" % (e)) except NoRecordsException as e: self.logger.info("No new records found: %s" % e)