Beispiel #1
0
    def __init__(self, nodecache):
        
        self.nodecache = nodecache
        self.constants = Containers()
        # Initialize feature containers, passing cache ref
        self.coords_entity = CoordEntity(self.nodecache)
        self.nodes_entity = NodeEntity(self.constants)
        self.ways_entity = WayEntity(self.nodecache, self.constants)
        self.relations_entity = RelationEntity(self.constants)

        self.nodeParser = OSMParser(concurrency=4, nodes_callback=self.nodes_entity.analyze)

        # Initialize the parser
        self.parser = OSMParser(concurrency=4, coords_callback=self.coords_entity.analyze,
                                ways_callback=self.ways_entity.analyze,
                                relations_callback=self.relations_entity.analyze)
Beispiel #2
0
class RoutingAnalyzer(object):

    def __init__(self, nodecache):
        
        self.nodecache = nodecache
        self.constants = Containers()
        # Initialize feature containers, passing cache ref
        self.coords_entity = CoordEntity(self.nodecache)
        self.nodes_entity = NodeEntity(self.constants)
        self.ways_entity = WayEntity(self.nodecache, self.constants)
        self.relations_entity = RelationEntity(self.constants)

        self.nodeParser = OSMParser(concurrency=4, nodes_callback=self.nodes_entity.analyze)

        # Initialize the parser
        self.parser = OSMParser(concurrency=4, coords_callback=self.coords_entity.analyze,
                                ways_callback=self.ways_entity.analyze,
                                relations_callback=self.relations_entity.analyze)


    # Calculate percentiles (not depending on numpy / scipy)
    # http://stackoverflow.com/questions/2374640/how-do-i-calculate-percentiles-with-python-numpy
    def percentile(self, N, percent, key=lambda x:x):

        """
        Find the percentile of a list of values.

        @parameter N - is a list of values. Note N MUST BE already sorted.
        @parameter percent - a float value from 0.0 to 1.0.
        @parameter key - optional key function to compute value from each element of N.

        @return - the ratio of all values below the percentile of the values and the total number in the array
        """
        if not N:
            return 0
        k = (len(N) - 1) * percent
        f = math.floor(k)
        c = math.ceil(k)
        if f == c:
            return float(key(N[int(k)]))
        d0 = key(N[int(f)]) * (c - k)
        d1 = key(N[int(c)]) * (k - f)
        return float(d0 + d1)

    # This function calculates the ROUTING dimension of data temperature
    # by calculating the atributes factor for each of the binned categories
    # of way features and weighing them according to the relative bin weight
    def routing_attributes_temperature(self, ways):
        highway_factor = ways.attribute_factor('highways')
        main_factor = ways.attribute_factor('main')
        local_factor = ways.attribute_factor('local')
        guidance_factor = ways.attribute_factor('guidance')
        unclassified_factor = ways.attribute_factor('unclassified')
        uncommon_factor = float (ways.uncommon_highway_length)/ways.length
        
        factors = highway_factor * ROAD_CATEGORY_WEIGHTS['highways'] + \
                main_factor * ROAD_CATEGORY_WEIGHTS['main'] + \
                local_factor * ROAD_CATEGORY_WEIGHTS['local'] + \
                guidance_factor * ROAD_CATEGORY_WEIGHTS['guidance'] + \
                unclassified_factor * ROAD_CATEGORY_WEIGHTS['unclassified'] + \
                uncommon_factor * ROAD_CATEGORY_WEIGHTS['uncommon']

        array = (highway_factor, main_factor, local_factor, guidance_factor, unclassified_factor, uncommon_factor, factors)
        return map(lambda x: BASIC_TEMP * x, array)
    
    # This function calculates the RELATION dimension of data temperature
    def relation_temperature(self, relations, intersections):
        number_of_intersections = len(filter(lambda x: x > 1, intersections.values()))
        return (float(relations.num_turnrestrcitions)/number_of_intersections) * BASIC_TEMP

    def freshness_temperature(self, edit_ages, edit_counts, current_date):
        # Aggregate user and edit counts
        # count all the dates that are within 1 month, 3months, 6 months, 1 year, 2 years
        # from the current date and give it an appropriate weight
        ages = edit_ages

        # don't reverse it, so when we ask for 95% edits it gets from the ascending order
        counts = edit_counts.values()
        counts.sort()

        # Freshness factors calculation
        # Count the number of values above the 1% , 10% age score, this gives the number of edits that are fresher
        # than 1% of the value.
        len_array = len(ages)
        ages1_factor = float(len(filter(lambda a:  current_date - datetime.timedelta(days = 30) <= a, ages)))/len_array * AGE_WEIGHT1
        ages10_factor = float(len(filter(lambda a: current_date - datetime.timedelta(days = 90) <= a, ages)))/len_array * AGE_WEIGHT10
        ages25_factor = float(len(filter(lambda a: current_date - datetime.timedelta(days = 180) <= a, ages)))/len_array * AGE_WEIGHT25
        ages50_factor = float(len(filter(lambda a: current_date - datetime.timedelta(days = 365) <= a, ages)))/len_array * AGE_WEIGHT50
        ages75_factor = float(len(filter(lambda a: current_date - datetime.timedelta(days = 730) <= a, ages)))/len_array * AGE_WEIGHT75

        # if the age of the data is older than 4 years weigh it negatively
        old_factor = (len_array - len(filter(lambda a: current_date + datetime.timedelta(days = -1460) <= a, ages)))/len_array * OLD_WEIGHT

        # Calculate 95 percintile of users, this is not part of freshness but
        # is used in the freshness temperature.
        user95_factor = float(len(filter(lambda a: a <= self.percentile(counts, 0.95), counts)))/len(counts) * USER_WEIGHT95
        return (ages1_factor + ages10_factor + ages25_factor   + user95_factor + ages50_factor + old_factor +
                ages75_factor) * BASIC_TEMP

    def data_temperatures(self):
        # Normalize the data temperature to between 0 and 40 and add a buffer of zero celsius
        reltemp = self.relation_temperature(self.relations_entity, self.constants.INTERSECTIONS) * DATA_TEMP # extra factor to improve
        # contribution from relations
        routingtemp = self.routing_attributes_temperature(self.ways_entity)
        freshnesstemp = self.freshness_temperature(self.constants.AGES, self.constants.USERS_EDITS, datetime.datetime.today())
        tigertemp = self.ways_entity.tiger_factor() * BASIC_TEMP
        finaltemp = ( RELATION_WEIGHT * reltemp +
                      ROUTING_WEIGHT * routingtemp[6] +
                      FRESHNESS_WEIGHT * freshnesstemp + 
                      TIGER_WEIGHT * tigertemp
                      + ZERO_DATA_TEMPERATURE 
                    )
        return reltemp, routingtemp[0], routingtemp[1], routingtemp[2], routingtemp[3], routingtemp[4], routingtemp[5], \
               routingtemp[6], freshnesstemp, tigertemp, finaltemp

    # The main function that parses the xml file and
    # calls the data temp calculations
    def run(self, filename):

        # check if the filename exists
        if not os.path.exists(filename):
            return

        # Timings can be done outside of the program using time(1)
        # and should probably be deprecated here
        t0 = time()
        
        # Parse the input data
        self.nodeParser.parse(filename)
        self.parser.parse(filename)
        t1 = time()

        # Print the parsing time
        # print 'The parsing of the file took %f' %(t1 - t0)
        
        # Calculate data temperature
        datatemps = self.data_temperatures()
        print 'Data temperatures for %s are: %s' % (filename, datatemps)
        #print 'Data temperature calculation took %fs' % (time() - t1)
        #print 'Total process took %fs' %(time() - t0)
        return datatemps