コード例 #1
0
ファイル: harvester.py プロジェクト: cgueret/EventSeer-to-RDF
 def _process_event(self, record):
     # Get the event id, its name and its last modification date 
     event_name = re.sub(r'<a [^>]*>([^<]*)</a>', '\g<1>', record['Event'], flags=re.IGNORECASE).replace('\n', '')
     event_id = re.search('href="([^"]*)"', record['Event']).group(1)[1:-1]
     event_last_update_date = datetime.strptime(record['Date'], "%d %b %Y")
     event = Event(event_id)
     
     # Get the last modification date from the end point
     server_version_date = self.triple_store.get_last_version_date(event)
     
     # Check the status of the event in the triple store
     if server_version_date == None or (event_last_update_date - server_version_date).days > 0:
         # The event is not in the triple store or needs to be updated
         print '\t[UPD] %s - %s' % (event_id, event_name)
         
         # Add the topics not already existing
         for t in event.get_topics():
             topic = Topic(t)
             if self.triple_store.get_last_version_date(topic) == None:
                 try:
                     print '\t\t[UPD-TOPIC] %s' % t
                     topic.load_data()
                     self.triple_store.save_rdf_data(topic)
                 except:
                     # It's ok if we miss one
                     pass
                 
         # Update the data about all the persons concerned
         for p in event.get_persons():
             try:
                 print '\t\t[UPD-PERSON] %s' % p
                 person = Person(p)
                 person.load_data()
                 self.triple_store.save_rdf_data(person)
             except:
                 # It's ok if we miss one
                 pass
             
         # Save the RDF data of the event
         event.load_data()
         self.triple_store.save_rdf_data(event)
         
         # Save the CFP from the call    
         file = open(self.data_directory + '/' + event.get_resource_name() + '_cfp.txt', 'w')
         file.write(event.get_cfp_data())
         file.close()
         
     else:
         # The server version is up to date
         print '\t[OK] %s - %s' % (event_id, event_name)
コード例 #2
0
ファイル: test.py プロジェクト: cgueret/EventSeer-to-RDF
'''
Created on 30 Jun 2011

@author: cgueret
'''
import re
from objects.persons import Person
from objects.events import Event

if __name__ == '__main__':
    dates = {
        'August 24-26, 2011', 'October 31-November 3, 2011',
        'September 5, 2011'
    }
    for d in dates:
        parts = re.search(
            '(?P<begin>[^-,]*)(-(?P<end>[^,]*))?, (?P<year>\d{4})',
            d).groupdict()
        print parts
    print Person('p/alistair')
    print Person('p/cui_tao')
    #print Person('p/alistair_<a href')

    event = Event('e/16470')
    event.load_data()
    print event.get_rdf_data()
コード例 #3
0
ファイル: client.py プロジェクト: cgueret/EventSeer-to-RDF
    def _process_record(self, record, type):
        '''
        Process a specific record
        '''
        # Compose a name and an ID for the record
        entity_name = re.sub(r'<a [^>]*>([^<]*)</a>', '\g<1>',
                             record[type]).replace('\n', '')
        entity_url = re.search('href="([^"]*)"', record[type]).group(1)
        entity_id = entity_url.replace('/', '')
        if type == 'Topic':
            entity_id = entity_id.replace('t', 'topic_')
            entity = Topic(entity_name, entity_id)
        if type == 'Event':
            entity_id = entity_id.replace('e', 'event_')
            entity = Event(entity_name, entity_id)
        rdf_file = '%s.rdf' % entity_id
        named_graph_resource = entity.named_graph()

        # Open the file in the container and get last modification date
        last_modification = None
        if os.path.isfile('data/' +
                          rdf_file):  # in self.zip_container.namelist():
            #data = self.zip_container.read(rdf_file)
            g = Graph()
            g.parse('data/' + rdf_file, format='xml')
            for _, _, date in g.triples(
                (named_graph_resource, DCT['modified'], None)):
                last_modification = isodate.parse_datetime(date)

        # If not existent or out dated, generate
        generate = False
        if last_modification == None:
            # If not existent, generate
            print '\t[GEN] %s - %s' % (entity_id, entity_name)
            generate = True
        else:
            delta = datetime.strptime(record['Date'],
                                      "%d %b %Y") - last_modification
            if delta.days > 0:
                # If updated, update
                print '\t[UPD] %s - %s' % (entity_id, entity_name)
                generate = True

        if not generate:
            print '\t[OK] %s - %s' % (entity_id, entity_name)
            return

        # Process a topic
        if type == 'Topic':
            print record
            pass

        # Process an event
        if type == 'Event':
            entity.parse(record, entity_url)
            # Save the CFP
            f = open('data/' + entity_id + '_cfp.txt', 'w')
            f.write(entity.get_cfp_data())
            f.close()

        # Save the RDF data
        f = open('data/' + rdf_file, 'w')
        f.write(entity.get_rdf_data())
        f.close()
コード例 #4
0
ファイル: test.py プロジェクト: cgueret/EventSeer-to-RDF
'''
Created on 30 Jun 2011

@author: cgueret
'''
import re
from objects.persons import Person
from objects.events import Event

if __name__ == '__main__':
    dates = {'August 24-26, 2011','October 31-November 3, 2011', 'September 5, 2011'}
    for d in dates:
        parts = re.search('(?P<begin>[^-,]*)(-(?P<end>[^,]*))?, (?P<year>\d{4})', d).groupdict()
        print parts
    print Person('p/alistair')
    print Person('p/cui_tao')
    #print Person('p/alistair_<a href')
           
    event = Event('e/16470')
    event.load_data()
    print event.get_rdf_data()