コード例 #1
0
    def parse_trafficnsw_twitter_entry(self):
        ''' (TwitterTimeline) -> list

        RETURN :    List of dictionary of required information.
        DESC :      This is to parse the data from TrafficNSW
        '''
        assert (self.id == 'TrafficNSW')

        # events = []
        raw_data = self.get_timelines()
        processed_data = []

        # print raw_data[0].keys()

        '''            
        The following are the keys in TrafficNSW timeline raw data
        - u'user': user information
        - u'text': event information
        - u'created_at': event time

        Typical event is shown as follow
            Sydney Traffic EMERGENCY ROAD WORKS - TURRAMURRA Pacific Hwy \
            at Kissing Point Rd #sydtraffic #trafficnetwork
        Before '-', it is event type; after that, it is event location.
        I also need to remove "Sydney Traffic" and 
            "#sydtraffic #trafficnetwork" signs.
        '''

        for entry in raw_data:
             # Process event time
            '''
            Data is shown as 
                Tue Apr 09 01:50:04 +0000 2013
            Please note it is UTC time. So, we need to convert it into local 
            (Sydney) time.
            Because striptime has a bug in parse '%z', I have to remove +0000 \
            from the string
            '''
            str_event_time = entry['created_at'].replace("+0000 ", "")
            utc_time = datetime.datetime.strptime(str_event_time, \
                '%a %b %d %H:%M:%S %Y')
            event_time = utc_time + datetime.timedelta(hours=10)

            event = {'time': event_time}

            # Process event text
            str_event = entry['text']

            event_type = str_event[len('Sydney Traffic '):str_event.find(' -')]
            event['type'] = event_type

            event_location = str_event[ str_event.find('-')+2 : \
                                        str_event.find(' #')].encode("utf-8")

            # Because GMap can't return "a st" near "b st", I replace near by at
            if ' near ' in event_location:
                event_location = event_location.replace(' near ', ' at ')
            
            event['location_text'] = event_location
            # print event_location
            
            gp = GmapQuery()
            gmap_answer = gp.ask_gmap_for_timeline(event_location + ", nsw")
            if gmap_answer:
                event.update(gmap_answer)

            # Process when GMap can't find the right place
            if not 'postcode' in event.keys() or event['postcode']==None \
            or (not any(char.isdigit() for char in event['postcode'])):
                location_str = event_location.split()
                suburb = []
                for i in location_str:
                    if i==i.upper() and (not any(char.isdigit() for char in i)):
                        suburb.append(i)
                    else:
                        break
                suburb = ' '.join(suburb)
                event['suburb'] = suburb
                
                # gmap_answer = gp.ask_gmap_for_timeline(event_location + ", nsw")
                # if gmap_answer:
                #     event['postcode'] = gmap_answer['postcode']

                auspost = AuspostAPI()
                suburb = suburb + ", NSW"
                # print suburb
                postcode = auspost.search_postcode(suburb)
                # print postcode
                if postcode!= None:
                    if isinstance(postcode, list):
                        event['postcode'] = postcode[0]['postcode']
                        event['coordinate'] = (postcode[0]['latitude'], postcode[0]['longitude'])
                    elif isinstance(postcode, dict):
                        event['postcode'] = postcode['postcode']
                        event['coordinate'] = (postcode['latitude'], postcode['longitude'])
                else:
                    event['postcode'] = None
                # print postcode

                event['location'] = event_location[len(suburb)+1:]
                        

            # print event_type, event_location

            processed_data.append(event)

        return processed_data
コード例 #2
0
    def parse_trafficnsw_twitter_entry(self):
        ''' (TwitterTimeline) -> list

        RETURN :    List of dictionary of required information.
        DESC :      This is to parse the data from TrafficNSW
        '''
        assert (self.id == 'TrafficNSW')

        # events = []
        raw_data = self.get_timelines()
        processed_data = []

        # print raw_data[0].keys()

        '''            
        The following are the keys in TrafficNSW timeline raw data
        - u'user': user information
        - u'text': event information
        - u'created_at': event time

        Typical event is shown as follow
            Sydney Traffic EMERGENCY ROAD WORKS - TURRAMURRA Pacific Hwy \
            at Kissing Point Rd #sydtraffic #trafficnetwork
        Before '-', it is event type; after that, it is event location.
        I also need to remove "Sydney Traffic" and 
            "#sydtraffic #trafficnetwork" signs.
        '''

        for entry in raw_data:
             # Process event time
            '''
            Data is shown as 
                Tue Apr 09 01:50:04 +0000 2013
            Please note it is UTC time. So, we need to convert it into local 
            (Sydney) time.
            Because striptime has a bug in parse '%z', I have to remove +0000 \
            from the string
            '''
            str_event_time = entry['created_at'].replace("+0000 ", "")
            utc_time = datetime.datetime.strptime(str_event_time, \
                '%a %b %d %H:%M:%S %Y')
            event_time = utc_time + datetime.timedelta(hours=10)

            event = {'time': event_time}

            # Process event text
            str_event = entry['text']

            event_type = str_event[len('Sydney Traffic '):str_event.find(' -')]

            event_location = str_event[ str_event.find('-')+2 : \
                                        str_event.find(' #')].encode("utf-8")

            # Because GMap can't return "a st" near "b st", I replace near by at
            if ' near ' in event_location:
                event_location = event_location.replace(' near ', ' at ')
            
            event['location_text'] = event_location
            # print event_location
            
            gp = GmapQuery()
            gmap_answer = gp.ask_gmap_for_timeline(event_location + ", nsw")
            if gmap_answer:
                event.update(gmap_answer)

            # print event_type, event_location

            processed_data.append(event)

        return processed_data