Example #1
0
 def bootstrap_oai(self, endpoint):  #todo update granularity
     """bootstraps OAI-PMH Source"""
     startdate = self.config['fromdate']
     self.logger.debug("Connecting to OAI-Endpoint %s" % endpoint)
     self.client = Client(endpoint, self.config['limit'],
                          self.config['checkurl'])
     try:
         no_records = 0
         for i, record in enumerate(
                 self.client.listRecords(startdate,
                                         delay=self.config['delay_time'])):
             no_records += self.process_record(record, init=True)
             self.lastcheckdate = record.responseDate()
         self.logger.info(
             "Finished adding  %d initial resources with checkdate: %s" %
             ((no_records * 2), self.lastcheckdate))
     except URLError, e:
         self.logger.error("URLError: %s" % (e))
Example #2
0
 def bootstrap_oai(self,endpoint): #todo update granularity
     """bootstraps OAI-PMH Source"""
     startdate=self.config['fromdate']
     self.logger.debug("Connecting to OAI-Endpoint %s" % endpoint)
     self.client=Client(endpoint,self.config['limit'],self.config['checkurl'])
     try:
         no_records=0
         for i,record in enumerate(self.client.listRecords(startdate,delay=self.config['delay_time'])):
             no_records+=self.process_record(record,init=True)
             self.lastcheckdate=record.responseDate()
         self.logger.info("Finished adding  %d initial resources with checkdate: %s" % ((no_records*2),self.lastcheckdate))
     except URLError, e:
         self.logger.error("URLError: %s" % (e))
Example #3
0
class Source(Observable):
    """A source contains a list of resources and changes over time"""

    RESOURCE_PATH = "/resources"
    STATIC_FILE_PATH = os.path.join(os.path.dirname(__file__), "static")
    TEMP_FILE_PATH = os.path.join(os.path.dirname(__file__), "temp")

    def __init__(self, config, hostname, port):
        """Initalize the source"""
        super(Source, self).__init__()
        self.logger = logging.getLogger('source')
        self.config = config
        self.logger.info("Source config: %s " % self.config)
        self.hostname = hostname
        self.port = port
        self.max_res_id = 1
        self._repository = {}  # {basename, {timestamp, size}}
        self.inventory_builder = None  # The inventory builder implementation
        self.changememory = None  # The change memory implementation
        self.no_events = 0
        self.oaimapping = {}  #oai
        self.client = None  #oai
        self.lastcheckdate = dateutil_parser.parse(
            config['fromdate'].strftime("%Y-%m-%d %H:%SZ"))  #oai

    ##### Source capabilities #####

    def add_inventory_builder(self, inventory_builder):
        """Adds an inventory builder implementation"""
        self.inventory_builder = inventory_builder

    @property
    def has_inventory_builder(self):
        """Returns True in the Source has an inventory builder"""
        return bool(self.inventory_builder is not None)

    def add_changememory(self, changememory):
        """Adds a changememory implementation"""
        self.changememory = changememory

    @property
    def has_changememory(self):
        """Returns True if a source maintains a change memory"""
        return bool(self.changememory is not None)

    ##### Bootstrap Source ######

    def bootstrap(self):
        """Bootstrap the source with a set of resources"""
        self.logger.info("Bootstrapping source")
        if self.has_changememory: self.changememory.bootstrap()
        if self.has_inventory_builder: self.inventory_builder.bootstrap()
        self._log_stats()

    ##### Source data accessors #####

    @property
    def base_uri(self):
        """Returns the base URI of the source (e.g., http://localhost:8888)"""
        return "http://" + self.hostname + ":" + str(self.port)

    @property
    def resource_count(self):
        """The number of resources in the source's repository"""
        return len(self._repository)

    @property
    def resources(self):
        """Iterates over resources and yields resource objects"""
        for basename in self._repository.keys():
            resource = self.resource(basename)
            if resource is None:
                self.logger.error("Cannot create resource %s " % basename + \
                      "because source object has been deleted.")
            else:
                yield resource

    @property
    def random_resource(self):
        """Returns a single random resource"""
        rand_res = self.random_resources()
        if len(rand_res) == 1:
            return rand_res[0]
        else:
            return None

    def resource(self, basename):
        """Creates and returns a resource object from internal resource
        repository. Repositoy values are copied into the object."""
        if not self._repository.has_key(basename): return None
        uri = basename
        timestamp = self._repository[basename]['timestamp']
        return Resource(uri=uri, timestamp=timestamp)

    def random_resources(self, number=1):
        "Return a random set of resources, at most all resources"
        if number > len(self._repository):
            number = len(self._repository)
        rand_basenames = random.sample(self._repository.keys(), number)
        return [self.resource(basename) for basename in rand_basenames]

    # Private Methods

    def _create_resource(self,
                         basename=None,
                         identifier=None,
                         timestamp=time.time(),
                         notify_observers=True,
                         oai=True):
        """Create a new resource, add it to the source, notify observers."""
        self._repository[basename] = {'timestamp': timestamp}
        change = ResourceChange(resource=self.resource(basename),
                                changetype="CREATED")
        if notify_observers:
            self.notify_observers(change)
            self.logger.debug("Event: %s" % repr(change))
        # add metadata resource url
        if oai:
            self._create_resource(
                basename=self.client.endpoint +
                "?verb=GetRecord&metadataPrefix=oai_dc&identifier=" +
                identifier,
                timestamp=timestamp,
                notify_observers=notify_observers,
                oai=False)
            self.oaimapping[identifier] = basename

    def _update_resource(self, basename, identifier, timestamp, oai=True):
        """Update a resource, notify observers."""
        self._repository[basename] = {'timestamp': timestamp}
        change = ResourceChange(resource=self.resource(basename),
                                changetype="UPDATED")
        self.notify_observers(change)
        self.logger.debug("Event: %s" % repr(change))
        # update metadata resource url
        if oai:
            self._update_resource(
                self.client.endpoint +
                "?verb=GetRecord&metadataPrefix=oai_dc&identifier=" +
                identifier,
                identifier,
                timestamp,
                oai=False)

    def _delete_resource(self,
                         identifier,
                         timestamp,
                         notify_observers=True,
                         oai=True):
        """Delete a given resource, notify observers."""
        basename = None
        if oai:
            basename = self.oaimapping[identifier]
            del self.oaimapping[identifier]
            # delete metadata resource url
            self._delete_resource(identifier,
                                  timestamp,
                                  notify_observers=notify_observers,
                                  oai=False)
        else:
            basename = self.client.endpoint + "?verb=GetRecord&metadataPrefix=oai_dc&identifier=" + identifier

        res = self.resource(basename)
        del self._repository[basename]
        res.timestamp = timestamp

        if notify_observers:
            change = ResourceChange(resource=res, changetype="DELETED")
            self.notify_observers(change)
            self.logger.debug("Event: %s" % repr(change))

    def bootstrap_oai(self, endpoint):  #todo update granularity
        """bootstraps OAI-PMH Source"""
        startdate = self.config['fromdate']
        self.logger.debug("Connecting to OAI-Endpoint %s" % endpoint)
        self.client = Client(endpoint, self.config['limit'],
                             self.config['checkurl'])
        try:
            no_records = 0
            for i, record in enumerate(
                    self.client.listRecords(startdate,
                                            delay=self.config['delay_time'])):
                no_records += self.process_record(record, init=True)
                self.lastcheckdate = record.responseDate()
            self.logger.info(
                "Finished adding  %d initial resources with checkdate: %s" %
                ((no_records * 2), self.lastcheckdate))
        except URLError, e:
            self.logger.error("URLError: %s" % (e))
        except NoRecordsException as e:
            self.logger.info("No new records found: %s" % e)
Example #4
0
class Source(Observable):
    """A source contains a list of resources and changes over time"""
    
    RESOURCE_PATH = "/resources"
    STATIC_FILE_PATH = os.path.join(os.path.dirname(__file__), "static")
    TEMP_FILE_PATH = os.path.join(os.path.dirname(__file__), "temp")
    
    def __init__(self, config, hostname, port):
        """Initalize the source"""
        super(Source, self).__init__()
        self.logger = logging.getLogger('source')
        self.config = config
        self.logger.info("Source config: %s " % self.config)
        self.hostname = hostname
        self.port = port
        self.max_res_id = 1
        self._repository = {} # {basename, {timestamp, size}}
        self.inventory_builder = None # The inventory builder implementation
        self.changememory = None # The change memory implementation
        self.no_events = 0
        self.oaimapping = {} #oai
        self.client=None #oai
        self.lastcheckdate=dateutil_parser.parse(config['fromdate'].strftime("%Y-%m-%d %H:%SZ")) #oai
    
    ##### Source capabilities #####
    
    def add_inventory_builder(self, inventory_builder):
        """Adds an inventory builder implementation"""
        self.inventory_builder = inventory_builder
        
    @property
    def has_inventory_builder(self):
        """Returns True in the Source has an inventory builder"""
        return bool(self.inventory_builder is not None)        
    
    def add_changememory(self, changememory):
        """Adds a changememory implementation"""
        self.changememory = changememory
        
    @property
    def has_changememory(self):
        """Returns True if a source maintains a change memory"""
        return bool(self.changememory is not None)
    
    ##### Bootstrap Source ######

    def bootstrap(self):
        """Bootstrap the source with a set of resources"""
        self.logger.info("Bootstrapping source")
        if self.has_changememory: self.changememory.bootstrap()
        if self.has_inventory_builder: self.inventory_builder.bootstrap()
        self._log_stats()
    
    ##### Source data accessors #####
    
    @property
    def base_uri(self):
        """Returns the base URI of the source (e.g., http://localhost:8888)"""
        return "http://" + self.hostname + ":" + str(self.port)

    @property
    def resource_count(self):
        """The number of resources in the source's repository"""
        return len(self._repository)
    
    @property
    def resources(self):
        """Iterates over resources and yields resource objects"""
        for basename in self._repository.keys():
            resource = self.resource(basename)
            if resource is None:
                self.logger.error("Cannot create resource %s " % basename + \
                      "because source object has been deleted.")
            else:
                yield resource
    
    @property
    def random_resource(self):
        """Returns a single random resource"""
        rand_res = self.random_resources()
        if len(rand_res) == 1:
            return rand_res[0]
        else:
            return None
    
    def resource(self, basename):
        """Creates and returns a resource object from internal resource
        repository. Repositoy values are copied into the object."""
        if not self._repository.has_key(basename): return None
        uri = basename
        timestamp = self._repository[basename]['timestamp']
        return Resource(uri = uri, timestamp = timestamp)
    
    def random_resources(self, number = 1):
        "Return a random set of resources, at most all resources"
        if number > len(self._repository):
            number = len(self._repository)
        rand_basenames = random.sample(self._repository.keys(), number)
        return [self.resource(basename) for basename in rand_basenames]
    
    
    # Private Methods
    
    def _create_resource(self, basename = None, identifier = None, timestamp=time.time(), notify_observers = True, oai = True):
        """Create a new resource, add it to the source, notify observers."""
        self._repository[basename] = {'timestamp': timestamp}
        change = ResourceChange(resource = self.resource(basename),
                                changetype = "CREATED")
        if notify_observers:
            self.notify_observers(change)
            self.logger.debug("Event: %s" % repr(change))
        # add metadata resource url            
        if oai:
            self._create_resource(basename=self.client.endpoint+"?verb=GetRecord&metadataPrefix=oai_dc&identifier="+identifier,timestamp=timestamp,notify_observers=notify_observers,oai=False)
            self.oaimapping[identifier]=basename;
        
    def _update_resource(self, basename, identifier, timestamp, oai = True):
        """Update a resource, notify observers."""
        self._repository[basename] = {'timestamp': timestamp}
        change = ResourceChange(
                    resource = self.resource(basename),
                    changetype = "UPDATED")
        self.notify_observers(change)
        self.logger.debug("Event: %s" % repr(change))
        # update metadata resource url
        if oai:
            self._update_resource(self.client.endpoint+"?verb=GetRecord&metadataPrefix=oai_dc&identifier="+identifier,identifier,timestamp,oai=False)

    def _delete_resource(self, identifier, timestamp, notify_observers = True, oai = True):
        """Delete a given resource, notify observers."""
        basename=None
        if oai:
            basename=self.oaimapping[identifier]
            del self.oaimapping[identifier]
            # delete metadata resource url
            self._delete_resource(identifier,timestamp,notify_observers=notify_observers,oai=False)
        else:
            basename=self.client.endpoint+"?verb=GetRecord&metadataPrefix=oai_dc&identifier="+identifier

        res = self.resource(basename)
        del self._repository[basename]
        res.timestamp = timestamp
        
        if notify_observers:
            change = ResourceChange(resource = res, changetype = "DELETED")
            self.notify_observers(change)
            self.logger.debug("Event: %s" % repr(change))
    
    def bootstrap_oai(self,endpoint): #todo update granularity
        """bootstraps OAI-PMH Source"""
        startdate=self.config['fromdate']
        self.logger.debug("Connecting to OAI-Endpoint %s" % endpoint)
        self.client=Client(endpoint,self.config['limit'],self.config['checkurl'])
        try:
            no_records=0
            for i,record in enumerate(self.client.listRecords(startdate,delay=self.config['delay_time'])):
                no_records+=self.process_record(record,init=True)
                self.lastcheckdate=record.responseDate()
            self.logger.info("Finished adding  %d initial resources with checkdate: %s" % ((no_records*2),self.lastcheckdate))
        except URLError, e:
            self.logger.error("URLError: %s" % (e))
        except NoRecordsException as e:
            self.logger.info("No new records found: %s" % e)