def plot_intervals_restricted_cv(output_folders):
    # Intervals by indexes, the actual indexes can currently only be seen from the print out
    # from the preprocessor
    # Run this on a CV that has been run on restricted intervals
    intervals = range(len(output_folders))
    AVG_avg_errors = []
    AVG_avg_errors_baseline = []
    AVG_avg_errors_ext = []
    #AVG_avg_errors_heu = []
    #AVG_avg_errors_ind = []

    for index, folder in enumerate(output_folders):

        # save average of averages with the given interval
        avg_errors = pickle.load(open(folder + 'avg_errors.pickle', 'r'))
        avg_errors_baseline = pickle.load(open(folder + 'avg_errors_baseline.pickle', 'r'))
        avg_errors_ext = pickle.load(open(folder + 'avg_errors_ext.pickle', 'rb'))
        #avg_errors_heu = pickle.load(open(folder + 'avg_errors_heu.pickle', 'rb'))
        #avg_errors_ind = pickle.load(open(folder + 'avg_errors_ind.pickle', 'rb'))

        AVG_avg_errors.append(avg(avg_errors))
        AVG_avg_errors_baseline.append(avg(avg_errors_baseline))
        AVG_avg_errors_ext.append(avg(avg_errors_ext))
        #AVG_avg_errors_heu.append(avg(avg_errors_heu))
        #AVG_avg_errors_ind.append(avg(avg_errors_ind))

    # plot
    plot(intervals, AVG_avg_errors, color='blue')
    plot(intervals, AVG_avg_errors_ext, color='red')
    #plot(intervals, AVG_avg_errors_ind, color='green')
    #plot(intervals, AVG_avg_errors_heu, color='yellow')
    plot(intervals, AVG_avg_errors_baseline, color='purple')
Beispiel #2
0
    def check_frame_coverage(self,
                             partition='train',
                             debug_idxs=None,
                             skip_idxs=()):
        """Record the fraction of potential frames and FEs that are present
        in gold compressions.
        """
        tgt_instances = self.get_instances(partition=partition,
                                           debug_idxs=debug_idxs,
                                           skip_idxs=skip_idxs)

        print "items\tavg_overlap_rate\tavg_reachability\tnum_frameless"
        for item in ['frame', 'fe']:
            instance_overlaps = []
            instance_reachability = []
            num_frameless = 0

            for instance in tgt_instances:
                sent_frame_tuples = getattr(instance, 'get_' + item +
                                            '_tuples')(instance.sentences[0])
                gold_overlaps = []
                for gold_sent in instance.gold_sentences:
                    gold_frame_tuples = getattr(instance, 'get_' + item +
                                                '_tuples')(gold_sent)
                    if len(gold_frame_tuples) == 0:
                        gold_overlaps.append(1)  # always reachable
                        num_frameless += 1
                        break
                    gold_frame_tuple_set = set(gold_frame_tuples)
                    overlap = gold_frame_tuple_set.intersection(
                        sent_frame_tuples)
                    gold_overlaps.append(len(overlap) / \
                                            len(gold_frame_tuple_set))

                instance_overlaps.append(avg(gold_overlaps))
                instance_reachability.append(int(min(gold_overlaps) == 1))

            print item, '\t', avg(instance_overlaps),
            print '\t\t', avg(instance_reachability),
            print '\t\t', num_frameless

        # We also need to check which frames are present and how many FEs
        # they have
        num_fes = defaultdict(int)
        in_tgts = 0
        for instance in tgt_instances:
            for sentence in instance.gold_sentences:  # + instance.input_sents:
                for frame in sentence.frames.nodes:
                    key = sum(
                        int(hasattr(edge, 'fe'))
                        for edge in frame.outgoing_edges.itervalues())
                    num_fes[key] += 1
                    in_tgts += sum(
                        int(hasattr(edge, 'target') and hasattr(edge, 'fe'))
                        for edge in frame.outgoing_edges.itervalues())

        print "Histogram of FEs per frame:", dict(num_fes)
        print "Number of FEs which are also targets:", in_tgts
Beispiel #3
0
    def check_frame_coverage(self, partition='train', debug_idxs=None,
            skip_idxs=()):
        """Record the fraction of potential frames and FEs that are present
        in gold compressions.
        """
        tgt_instances = self.get_instances(partition=partition,
                                           debug_idxs=debug_idxs,
                                           skip_idxs=skip_idxs)

        print "items\tavg_overlap_rate\tavg_reachability\tnum_frameless"
        for item in ['frame', 'fe']:
            instance_overlaps = []
            instance_reachability = []
            num_frameless = 0

            for instance in tgt_instances:
                sent_frame_tuples = getattr(instance,
                        'get_' + item + '_tuples')(instance.sentences[0])
                gold_overlaps = []
                for gold_sent in instance.gold_sentences:
                    gold_frame_tuples = getattr(instance,
                            'get_' + item + '_tuples')(gold_sent)
                    if len(gold_frame_tuples) == 0:
                        gold_overlaps.append(1)     # always reachable
                        num_frameless += 1
                        break
                    gold_frame_tuple_set = set(gold_frame_tuples)
                    overlap = gold_frame_tuple_set.intersection(
                                sent_frame_tuples)
                    gold_overlaps.append(len(overlap) / \
                                            len(gold_frame_tuple_set))

                instance_overlaps.append(avg(gold_overlaps))
                instance_reachability.append(int(min(gold_overlaps) == 1))

            print item, '\t', avg(instance_overlaps),
            print '\t\t', avg(instance_reachability),
            print '\t\t', num_frameless

        # We also need to check which frames are present and how many FEs
        # they have
        num_fes = defaultdict(int)
        in_tgts = 0
        for instance in tgt_instances:
            for sentence in instance.gold_sentences: # + instance.input_sents:
                for frame in sentence.frames.nodes:
                    key = sum(int(hasattr(edge, 'fe'))
                              for edge in frame.outgoing_edges.itervalues())
                    num_fes[key] += 1
                    in_tgts += sum(int(hasattr(edge, 'target') and
                                       hasattr(edge, 'fe'))
                              for edge in frame.outgoing_edges.itervalues())

        print "Histogram of FEs per frame:", dict(num_fes)
        print "Number of FEs which are also targets:", in_tgts
Beispiel #4
0
 def test_predict(self):
     dimension = 10
     matrix = [[randint(1, 10) for _i in range(0, dimension)]
               for _c in range(0, dimension)]
     cf = MFExplicitPrepSGD(matrix, lf=4)
     for user_id in range(dimension):
         with self.subTest(i=user_id):
             avg_user = avg(matrix[user_id])
             for item_id in range(len(matrix[user_id])):
                 avg_item = avg(cf.matrix.col(item_id))
                 with self.subTest(i=item_id):
                     prep = cf.predict_prep(user_id, item_id)
                     real = prep + 0.5 * (avg_user + avg_item)
                     self.assertEqual(cf.predict(user_id, item_id), real)
Beispiel #5
0
    def squeeze_shallow_cache_to_avg(self):
        with self.lock:
            if len(self.shallow_cache) > 0:
                out_temp_avg = utils.avg(
                    [x.outdoor_data[0] for x in self.shallow_cache])
                out_humi_avg = utils.avg(
                    [x.outdoor_data[1] for x in self.shallow_cache])
                out_pm10_avg = utils.avg(
                    [x.outdoor_data[2] for x in self.shallow_cache])
                out_pm25_avg = utils.avg(
                    [x.outdoor_data[3] for x in self.shallow_cache])
                in_temp_avg = utils.avg(
                    [x.indoor_data[0] for x in self.shallow_cache])
                in_humi_avg = utils.avg(
                    [x.indoor_data[1] for x in self.shallow_cache])
                in_pm10_avg = utils.avg(
                    [x.indoor_data[2] for x in self.shallow_cache])
                in_pm25_avg = utils.avg(
                    [x.indoor_data[3] for x in self.shallow_cache])

                avg = HumidexData(
                    (in_temp_avg, in_humi_avg, in_pm10_avg, in_pm25_avg),
                    (out_temp_avg, out_humi_avg, out_pm10_avg, out_pm25_avg))
                avg.timestamp = self.shallow_cache[-1].timestamp
                self.avg_cache.append(avg)
                self.shallow_cache = []
Beispiel #6
0
def summarize(sensor, timeframe, start, end):
    # prepare the database schema to use
    if timeframe == "hour":
        key_to_read = sensor["db_sensor"]
        key_to_write = sensor["db_sensor"] + ":hour"
    elif timeframe == "day":
        key_to_read = sensor["db_sensor"] + ":hour:avg"
        key_to_write = sensor["db_sensor"] + ":day"
    # retrieve from the database the data based on the given timeframe
    data = db.rangebyscore(key_to_read, start, end, withscores=True)
    # split between values and timestamps
    values = []
    timestamps = []
    for i in range(0, len(data)):
        timestamps.append(data[i][0])
        values.append(data[i][1])
    # calculate the derived values
    timestamp = start
    min = avg = max = rate = sum = count = count_unique = "-"
    if "avg" in sensor["summarize"] and sensor["summarize"]["avg"]:
        # calculate avg
        avg = utils.avg(values)
        db.deletebyscore(key_to_write + ":avg", start, end)
        db.set(key_to_write + ":avg", avg, timestamp)
    if "min_max" in sensor["summarize"] and sensor["summarize"]["min_max"]:
        # calculate min
        min = utils.min(values)
        db.deletebyscore(key_to_write + ":min", start, end)
        db.set(key_to_write + ":min", min, timestamp)
        # calculate max
        max = utils.max(values)
        db.deletebyscore(key_to_write + ":max", start, end)
        db.set(key_to_write + ":max", max, timestamp)
    if "rate" in sensor["summarize"] and sensor["summarize"]["rate"]:
        # calculate the rate of change
        rate = utils.velocity(timestamps, values)
        db.deletebyscore(key_to_write + ":rate", start, end)
        db.set(key_to_write + ":rate", rate, timestamp)
    if "sum" in sensor["summarize"] and sensor["summarize"]["sum"]:
        # calculate the sum
        sum = utils.sum(values)
        db.deletebyscore(key_to_write + ":sum", start, end)
        db.set(key_to_write + ":sum", sum, timestamp)
    if "count" in sensor["summarize"] and sensor["summarize"]["count"]:
        # count the values
        count = utils.count(values)
        db.deletebyscore(key_to_write + ":count", start, end)
        db.set(key_to_write + ":count", count, timestamp)
    if "count_unique" in sensor["summarize"] and sensor["summarize"][
            "count_unique"]:
        # count the unique values
        count_unique = utils.count_unique(values)
        db.deletebyscore(key_to_write + ":count_unique", start, end)
        db.set(key_to_write + ":count_unique", count_unique, timestamp)
    log.debug("[" + sensor["module_id"] + "][" + sensor["group_id"] + "][" +
              sensor["sensor_id"] + "] (" + utils.timestamp2date(timestamp) +
              ") updating summary of the " + timeframe +
              " (min,avg,max,rate,sum,count,count_unique): (" + str(min) +
              "," + str(avg) + "," + str(max) + "," + str(rate) + "," +
              str(sum) + "," + str(count) + "," + str(count_unique) + ")")
Beispiel #7
0
def get_research_methods(triples):
    """
    Get chunks from IOB tags of research methods
    Input: list of triple [word, tag, score] of the paper got from tagger
    Ouput: list of research methods with scores
    """
    tags = ['O', 'B-RS', 'I-RS']
    top = 'O'
    stack_rs = []
    stack_sc = []
    dict_research_method = {}

    # stack to get the research methods from paper
    for i in range(len(triples)):
        if triples[i][1] == 'B-RS':
            if top == 'O':
                stack_rs.append(triples[i][0])
                stack_sc.append(
                    softmax(triples[i][2])[tags.index(triples[i][1])])
                top = 'B-RS'
            else:
                research_method = ' '.join(stack_rs)
                dict_research_method[research_method] = avg(stack_sc)
                stack_rs = [triples[i][0]]
                stack_sc = [triples[i][2]]
                top = 'B-RS'

        elif triples[i][1] == 'I-RS':
            if top == 'O':
                continue
            else:
                stack_rs.append(triples[i][0])
                stack_sc.append(
                    softmax(triples[i][2])[tags.index(triples[i][1])])
                top = 'I-RS'

        else:
            if top == 'O':
                continue
            else:
                research_method = ' '.join(stack_rs)
                dict_research_method[research_method] = avg(stack_sc)
                stack_rs = []
                stack_sc = []
                top = 'O'

    return dict_research_method
Beispiel #8
0
    def print_cluster_separation(self):
        print "CLUSTER SEPERATION"
        print
        print "Comparing each Cluster to it's most similar other clusters"

        if len(self.clusters) < 2:
            print "There are less than two clusters"
            return

        cluster_sim_mat = self.confirm.get_cluster_sim_mat()
        for row in cluster_sim_mat:
            row.sort(reverse=True)

        top_1 = list()
        top_3 = list()
        top_5 = list()
        for row in cluster_sim_mat:
            for x, val in enumerate(row):
                if x == 0:
                    continue
                if x <= 1:
                    top_1.append(val)
                if x <= 3:
                    top_3.append(val)
                if x <= 5:
                    top_5.append(val)
                else:
                    break
        top_1.sort(reverse=True)

        top_1_mean = utils.avg(top_1)
        top_1_stddev = utils.stddev(top_1)
        top_3_mean = utils.avg(top_3)
        top_3_stddev = utils.stddev(top_3)
        top_5_mean = utils.avg(top_5)
        top_5_stddev = utils.stddev(top_5)
        print "\n        Mean\t   Std Dev"
        print "Top 1: %3.3f\t %3.3f" % (top_1_mean, top_1_stddev)
        print "Top 3: %3.3f\t %3.3f" % (top_3_mean, top_3_stddev)
        print "Top 5: %3.3f\t %3.3f" % (top_5_mean, top_5_stddev)
        print
        print "List of 10 most similar scores"
        print ", ".join(map(lambda x: "%4.3f" % x, top_1[:10]))

        print
        print
Beispiel #9
0
	def print_cluster_separation(self):
		print "CLUSTER SEPERATION"
		print
		print "Comparing each Cluster to it's most similar other clusters"

		if len(self.clusters) < 2:
			print "There are less than two clusters"
			return

		cluster_sim_mat = self.confirm.get_cluster_sim_mat()
		for row in cluster_sim_mat:
			row.sort(reverse=True)

		top_1 = list()
		top_3 = list()
		top_5 = list()
		for row in cluster_sim_mat:
			for x, val in enumerate(row):
				if x == 0:
					continue
				if x <= 1:
					top_1.append(val)
				if x <= 3:
					top_3.append(val)
				if x <= 5:
					top_5.append(val)
				else:
					break
		top_1.sort(reverse=True)

		top_1_mean = utils.avg(top_1)
		top_1_stddev = utils.stddev(top_1)
		top_3_mean = utils.avg(top_3)
		top_3_stddev = utils.stddev(top_3)
		top_5_mean = utils.avg(top_5)
		top_5_stddev = utils.stddev(top_5)
		print "\n        Mean\t   Std Dev"
		print "Top 1: %3.3f\t %3.3f" % (top_1_mean, top_1_stddev)
		print "Top 3: %3.3f\t %3.3f" % (top_3_mean, top_3_stddev)
		print "Top 5: %3.3f\t %3.3f" % (top_5_mean, top_5_stddev)
		print
		print "List of 10 most similar scores"
		print ", ".join(map(lambda x: "%4.3f" % x, top_1[:10]))

		print
		print
def report_headers(file3, item):
    cdata = companydata()
    today = datetime.today().strftime('%m/%d/%Y')
    invodate = datetime.today().strftime('%m/%d/%Y')
    ltm, rtm, bump, tb, ctrall, left_ctr, right_ctr, dl, dh, tdl, hls, m1, m2, m3, m4, m5, m6, m7, n1, n2, n3 = reportsettings(
        1)

    dateline = m1 + 8.2 * dl
    mtmp = dateline - 3.5 * dl
    level1 = mtmp + 3.5 * dl

    c = canvas.Canvas(file3, pagesize=letter)
    c.setLineWidth(1)

    c.setFont('Helvetica-Bold', 24, leading=None)
    c.drawCentredString(rtm - 75, dateline + 1.5 * dl, 'Report')
    c.setFont('Helvetica-Bold', 12, leading=None)
    c.drawString(ltm + bump * 3, level1 + bump * 2, f'{item.upper()} Report')
    c.setFont('Helvetica', 12, leading=None)
    c.drawCentredString(rtm - 50, dateline + bump, 'Created')
    #c.drawCentredString(rtm - 37.7, dateline + bump, 'Type')

    vdat = Vehicles.query.filter(Vehicles.DOTNum != None).first()
    dh = 13
    top = level1 - dh
    lft = ltm + bump * 3
    header = list(range(5))
    header[0] = f'This Report Page is the {item.upper()}'
    header[1] = 'Information List for'
    header[2] = f'{cdata[0]}'
    header[3] = f'DOT #{vdat.DOTNum}'
    header[4] = ''
    for ix in header:
        c.drawString(lft, top, ix)
        top = top - dh

    x = avg(rtm - 75, rtm)
    y = dateline - dh - bump
    #c.drawCentredString(x, y, f'{item.upper()}')
    x = avg(rtm - 75, rtm - 150)
    c.drawCentredString(rtm - 50, y, invodate)

    c.showPage()
    c.save()
Beispiel #11
0
 def _init_user_avg(self):
     """
     Description
         A function which returns the users' average ratings as
         a defaultdict.
     """
     user_avg = defaultdict(int)
     for user in self.users:
         user_avg[user] = avg(self.matrix[user])
     return user_avg
Beispiel #12
0
 def test_initialization(self):
     dimension = 10
     matrix = [[randint(1, 10) for _i in range(0, dimension)]
               for _c in range(0, dimension)]
     cf = MFExplicitPrepSGD(matrix, lf=4)
     self.assertEqual(len(cf.matrix), dimension)
     elements = [
         element for row in cf.preprocessed_matrix for element in row
     ]
     self.assertAlmostEqual(avg(elements), 0, delta=0.00001)
Beispiel #13
0
 def _init_item_avg(self):
     """
     Description
         A function which returns the items' average ratings as
         a defaultdict.
     """
     item_avg = defaultdict(int)
     for item in self.items:
         item_avg[item] = avg(self.matrix.col(item))
     return item_avg
Beispiel #14
0
 def _init_avg_ratings(self):
     """
     Description
         A function which computes and returns users'
         average ratings.
     """
     avg_r = DynamicArray(default_value=lambda: 0)
     for index, user in enumerate(self.matrix):
         avg_r[index] = avg(user)
     return avg_r
Beispiel #15
0
def simulate(net, config):
    robot, ball, goal = reset()
    global fitness, total_steps, MAX_STEPS, reset_sim
    for step in range(MAX_STEPS):
        # calculate new net inputs
        rotated_center = Vec2d(10.5, 0)
        rotated_center.rotate(robot.angle)
        rotated_center += robot.position

        goal_pos = utils.avg(goal.a, goal.b)
        ball_dist = utils.dist(rotated_center,
                               ball.position)  # robot -> ball dist
        goal_dist = utils.dist(ball.position, goal_pos)  # ball -> goal dist
        ball_dir, goal_dir = utils.get_angles(
            rotated_center, ball.position, goal_pos)  # inputs for neural net
        fitness = utils.calculate_fitness(ball_dist, goal_dist,
                                          robot_touched_ball)

        # scale values for nn
        #ball_dir = np.interp(ball_dir, [0, 360], [0.0, 1.0])
        #goal_dir = np.interp(goal_dir, [0, 360], [0.0, 1.0])
        #ball_dist = np.interp(ball_dist, [0, 303.6], [0.0, 1.0])

        # get input from neural net here, need to calculate balldir and goaldir though
        # NEW INPJTS SHOULD BE: fixed ball dir, fixed ball dist, fix goal direction, fixed goal distance
        # need to subtract curent heading from the ball dir
        # goal dir should be robot to goal not bloody ball to goal
        # remove int touched ball
        # something else here?
        rotation, speed = net.activate(
            [ball_dir, ball_dist, goal_dir,
             int(robot_touched_ball)])
        rotation = utils.clamp(rotation, -1.0, 1.0)
        speed = utils.clamp(speed, -1.0, 1.0)
        rotation *= 10  # rotation will be in degrees
        speed *= 50  # max speed = 60

        robot.angle += math.radians(rotation)
        robot.velocity = (speed * math.cos(robot.angle - 1.5708),
                          speed * math.sin(robot.angle - 1.5708))

        # step sim based on input
        robot.angular_velocity = 0
        robot.center_of_gravity = (10.5, 10.5)
        space.step(1.0 / 60.0)

        total_steps += 1

        # session was ended from one of the callback listeners, so we know it's got the bonuses already
        if reset_sim:
            reset_sim = False
            return fitness

    # test failed to complete, still subtract total steps
    return fitness - (total_steps / 1.5) + total_steps_touching_ball
def percent_success(actions):
    """Finds the percent of times didSucceed is true in a list of actions.

    actions is the list of actions that can either succeed or fail."""
    successes = [action.get('didSucceed') for action in actions]
    # Returns the integer percentage of times in successes that
    # didSucceed is true. Taking an average of a list of booleans
    # returns a float between 0 and 1 of what percentage of times the
    # value was True.
    # Example: [True, True, False, True] returns 75.
    return round(100 * utils.avg(successes))
Beispiel #17
0
    def feature_eval_metrics(self, sim_fun):
        doc_cluster_sims_flat = list()
        doc_cluster_means = list()
        doc_cluster_std_devs = list()
        for cluster in self.clusters:
            cluster_sims = list()
            for _doc in cluster.members:
                val = sim_fun(cluster, _doc)
                doc_cluster_sims_flat.append(val)
                cluster_sims.append(val)
            doc_cluster_means.append(utils.avg(cluster_sims))
            doc_cluster_std_devs.append(utils.stddev(cluster_sims))
        global_mean = utils.avg(doc_cluster_sims_flat)
        global_stddev = utils.stddev(doc_cluster_sims_flat)
        mean_of_means = utils.avg(doc_cluster_means)
        stddev_of_means = utils.stddev(doc_cluster_means)
        mean_of_stddev = utils.avg(doc_cluster_std_devs)
        stddev_of_stddev = utils.stddev(doc_cluster_std_devs)

        return global_mean, global_stddev, mean_of_means, stddev_of_means, mean_of_stddev, stddev_of_stddev
Beispiel #18
0
	def feature_eval_metrics(self, sim_fun):
		doc_cluster_sims_flat = list()
		doc_cluster_means = list()
		doc_cluster_std_devs = list()
		for cluster in self.clusters:
			cluster_sims = list()
			for _doc in cluster.members:
				val = sim_fun(cluster, _doc)
				doc_cluster_sims_flat.append(val)
				cluster_sims.append(val)
			doc_cluster_means.append(utils.avg(cluster_sims))
			doc_cluster_std_devs.append(utils.stddev(cluster_sims))
		global_mean = utils.avg(doc_cluster_sims_flat)
		global_stddev = utils.stddev(doc_cluster_sims_flat)
		mean_of_means = utils.avg(doc_cluster_means)
		stddev_of_means = utils.stddev(doc_cluster_means)
		mean_of_stddev = utils.avg(doc_cluster_std_devs)
		stddev_of_stddev = utils.stddev(doc_cluster_std_devs)

		return global_mean, global_stddev, mean_of_means, stddev_of_means, mean_of_stddev, stddev_of_stddev
Beispiel #19
0
	def print_cluster_cohesion(self):
		print "CLUSTER COHESION:"
		sim_names = self.clusters[0].members[0].get_feature_set_names()[:]
		sim_names.append("confirm")
		print "\t\t%s     SIZE" % ("        ".join(sim_names))
		for x, cluster in enumerate(self.clusters):
			# list of lists
			similarities = map(lambda _doc: _doc.global_sim(cluster.center), cluster.members)
			to_print = list()
			for y in xrange(len(similarities[0])):
				values = map(lambda row: row[y], similarities)
				to_print.append(utils.avg(values))
				to_print.append(utils.stddev(values))
			values = map(lambda _doc: self.confirm.cluster_doc_similarity(cluster, _doc), cluster.members)
			to_print.append(utils.avg(values))
			to_print.append(utils.stddev(values))
			l = len(cluster.members)
			print "\t%s:  %s  %d" % (x, "  ".join(map(lambda s: "%3.2f" % s, to_print)), l)
		print
		print
Beispiel #20
0
    def update(self, task_info, new_runtime):
        func = task_info['function_id']
        end = task_info['endpoint_id']
        group = self.endpoints[end]['group']

        while len(self.runtimes[func][group].queue) > self.last_n:
            self.runtimes[func][group].get()
        self.runtimes[func][group].put(new_runtime)
        self.avg_runtime[func][group] = avg(self.runtimes[func][group])

        self.num_executions[func][group] += 1
Beispiel #21
0
def calculate_avg_cycle_time(cycles):
    """Calculates the average time for an action based on start and end times.

    Finds the time difference between each action pair passed and
    returns the average of the differences.

    cycles is a list of tuples where the first action in the tuple is
    the intake, and the second item is the placement or drop."""
    cycle_times = []
    for cycle in cycles:
        cycle_times.append(cycle[0].get('time') - cycle[1].get('time'))
    return utils.avg(cycle_times, None)
 def get_speed(self, condition, sub_name):
     """
     >>> d = get_simple_test_dir() + os.sep + "test_network1"
     >>> r = ReadReports(d)
     >>> info = GetProcessedInformation(r)
     >>> info.get_speed("normal","singles") == [4.75, 4, 5]
     True
     """
     # {'host7': 5, 'host6': 4, 'host5': 5, 'host4': 5, 'host3': 4, 'host2': 5, 'host1': 5, 'host8': 5}
     null_io = self.information.get_last_non_null_io(condition, sub_name)
     # print self.information.get_null_io(condition, sub_name)
     l = simple_dict_to_list(null_io)
     return [avg(l), min(l), max(l)]
Beispiel #23
0
 def saving():
     begin_time = datetime.now() - dt
     round_time = begin_time.replace(
         minute=30 * (begin_time.minute // 30)).strftime('%H:%M')
     try:
         kits = kitchens_at(round_time)
     except MissingDBTimeError:
         return
     new_kits = []
     for kit in kits:
         if kit.name == 'Cadillac':  # Only Cadillac is live for now, do not update the rest
             dkit = kit._asdict()
             dkit['people'] = avg(people)
             dkit['fruits'] = {
                 fruit: avg(values)
                 for fruit, values in fruits_history.items()
             }
             new_kits.append(Kitchen(**dkit))
     kitchens_add_history(LiveInfo(round_time, new_kits))
     people.clear()
     for fruit in fruits_history.values():
         fruit.clear()
def consolidate_nums(nums):
    """Given numbers reported by multiple scouts, estimates actual number

    nums is a list of numbers, representing action counts or times, reported by each scout
    Currently tries to consolidate using only the reports from scouts on one robot,
    but future improvements might change the algorithm to account for other alliance members,
    since TBA can give us the total action counts for the alliance
    """
    mean = utils.avg(nums)
    if mean in nums or len(nums) == 0:
        # Avoid getting a divide by zero error when calculating standard deviation
        return round(mean)
    # If two or more scouts agree, automatically go with what they say
    if len(nums) > len(set(nums)):
        # Still need to consolidate, in case there are multiple modes
        return consolidate_nums(modes(nums))
    # Population standard deviation:
    std_dev = statistics.pstdev(nums)
    # Calculate weighted average, where the weight for each num is its reciprocal square z-score
    # That way, we account less for data farther from the mean
    z_scores = [(num - mean) / std_dev for num in nums]
    weights = [1 / z**2 for z in z_scores]
    float_nums = utils.avg(nums, weights)
    return round(float_nums)
Beispiel #25
0
def history():
    """
    AVG availability of sets overall for a time interval - use for a graph
    :return: [{timeInterval: '2:30', avg:100}]
    """
    live_data = people_counting.get_current()
    kitchens_avg = {
        hour: avg([koch.empty_seats for koch in kitchens_at(hour)])
        for hour in cfg.hours
    }
    if live_data.time in cfg.hours:
        kitchens_avg.update({
            live_data.time:
            avg([koch.empty_seats for koch in live_data.kitchens])
        })

    return jsonify({
        'time':
        live_data.time,
        'kitchens': [{
            'time': time,
            'avg': kitchens_avg
        } for time, kitchens_avg in kitchens_avg.items()]
    })
Beispiel #26
0
 def print_cluster_cohesion(self):
     print "CLUSTER COHESION:"
     sim_names = self.clusters[0].members[0].get_feature_set_names()[:]
     sim_names.append("confirm")
     print "\t\t%s     SIZE" % ("        ".join(sim_names))
     for x, cluster in enumerate(self.clusters):
         # list of lists
         similarities = map(lambda _doc: _doc.global_sim(cluster.center),
                            cluster.members)
         to_print = list()
         for y in xrange(len(similarities[0])):
             values = map(lambda row: row[y], similarities)
             to_print.append(utils.avg(values))
             to_print.append(utils.stddev(values))
         values = map(
             lambda _doc: self.confirm.cluster_doc_similarity(
                 cluster, _doc), cluster.members)
         to_print.append(utils.avg(values))
         to_print.append(utils.stddev(values))
         l = len(cluster.members)
         print "\t%s:  %s  %d" % (x, "  ".join(
             map(lambda s: "%3.2f" % s, to_print)), l)
     print
     print
Beispiel #27
0
    def predict(self, user_id, item_id):
        """
        Description:
            Returns a postprocessed prediction of a rating.

        Arguments:
            :param user_id: The user identifier.
            :type user_id: int
            :param item_id: The item identifier.
            :type item_id: int
        """
        if self.matrix[user_id][item_id] is None:
            nbs = self.neighborhood_of(user_id)
            nbs_ratings = [self.matrix[u_id][item_id] for u_id in nbs]
            return avg(nbs_ratings)
        else:
            return self.matrix[user_id][item_id]
Beispiel #28
0
    def recommend(self, user_id, n_rec):
        """
        Description
            A function which returns recommendations for a user.

        Arguments
            :param user_id: The user identifier.
            :type user_id: int
            :param n_rec: The number of items to recommend.
            :type n_rec: int
        """
        item_ids = [i for i in range(0, len(self.matrix[user_id]))
                    if self.matrix[user_id][i] is None]
        nbs = self.neighborhood_of(user_id)
        nbs_predictions = {
            i: [self.predict(n, i) for n in nbs] for i in item_ids}
        predictions = {
            key: avg(nbs_predictions[key]) for key in nbs_predictions}
        return sorted(
            item_ids,
            key=lambda item_id: predictions[item_id])[:-n_rec]
Beispiel #29
0
def history_saver():
    from kitchen_detection.people_counting import LiveInfo
    from statistics import mode

    people = []
    fruits_history = {fruit: [] for fruit in cfg.fruit_types}
    dt = timedelta(minutes=30)

    def saving():
        begin_time = datetime.now() - dt
        round_time = begin_time.replace(
            minute=30 * (begin_time.minute // 30)).strftime('%H:%M')
        try:
            kits = kitchens_at(round_time)
        except MissingDBTimeError:
            return
        new_kits = []
        for kit in kits:
            if kit.name == 'Cadillac':  # Only Cadillac is live for now, do not update the rest
                dkit = kit._asdict()
                dkit['people'] = avg(people)
                dkit['fruits'] = {
                    fruit: avg(values)
                    for fruit, values in fruits_history.items()
                }
                new_kits.append(Kitchen(**dkit))
        kitchens_add_history(LiveInfo(round_time, new_kits))
        people.clear()
        for fruit in fruits_history.values():
            fruit.clear()

    schedule.every().minute.at(":30").do(saving)
    while True:
        people.append(mode(people_buffer))
        for fruit, history in fruits_history.items():
            history.append(avg(fruit_buffer[fruit]))
        schedule.run_pending()
        sleep(60)
Beispiel #30
0
def get_current() -> LiveInfo:
    kitchens = []
    for kitchen in cfg.kitchens:
        if kitchen[
                'name'] == 'Cadillac':  # Only Cadillac is live now, fake the rest
            people = mode(kitchen_detection.people_live.people_buffer)
            fruits = {
                fruit: avg(history)
                for fruit, history in
                kitchen_detection.people_live.fruit_buffer.items()
            }
        else:
            people = kitchen['seats'] / 2
            fruits = {'apple': 1}

        kitchens.append(
            Kitchen(kitchen['name'], kitchen['seats'], kitchen['floor'],
                    people, fruits))

    begin_time = datetime.now()
    round_time = begin_time.replace(
        minute=30 * (begin_time.minute // 30)).strftime('%H:%M')
    return LiveInfo(round_time, kitchens)
def calculate_obj_team(team):
    """Calculate data for given team using objective calculated TIMs"""
    team_info = {}
    # list of TIMs that the team has been in:
    tims = local_database_communicator.read_dataset('processed.calc_obj_tim', team_number=team)
    # Calculate averages
    for calculation, schema in SCHEMA['averages'].items():
        # Find tims that meet required data field:
        tim_action_counts = []
        for tim in tims:
            # Gets the total number of actions for a single tim
            tim_action_counts.append(sum(
                [tim[tim_field] for tim_field in schema['tim_fields']]))
        if schema['type'] in ['int', 'float']:
            average = utils.avg(tim_action_counts)
            average = STR_TYPES[schema['type']](average)
        else:
            raise TypeError(f'{calculation} should be a number in calc obj team schema')
        team_info[calculation] = average
    # Calculate counts
    for calculation, schema in SCHEMA['counts'].items():
        tims_that_meet_filter = tims
        for key, value in schema['tim_fields'].items():
            if key == 'not':
                # not_field expects the output to be anything but the given filter
                # not_value is the filter that not_field shouldn't have
                for not_field, not_value in value.items():
                    # Checks that the TIMs in the 'not' field are anything other than the filter
                    tims_that_meet_filter = list(filter(lambda tim: tim.get(
                        not_field, not_value) != not_value, tims_that_meet_filter))
            else:
                # Checks that the TIMs in their given field meet the filter
                tims_that_meet_filter = list(filter(
                    lambda tim: tim[key] == value, tims_that_meet_filter))
        team_info[calculation] = STR_TYPES[schema['type']](len(tims_that_meet_filter))
    return team_info
Beispiel #32
0
    def test_optimality(self,
                        learner,
                        partition='train',
                        debug_idxs=None,
                        skip_idxs=(),
                        decoder='lp+mst',
                        streaming=False,
                        overwritten_params=(),
                        **kwargs):
        """Note the proportion of optimal solutions when approximating.
        """
        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder=decoder,
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        approx_token_solns, approx_dep_solns = [], []
        for instance in eval_instances:
            if instance.decoder.has_solution():
                approx_token_solns.append(
                    [tuple(idx) for idx in instance.output_idxs])
                approx_dep_solns.append(
                    instance.get_dep_tuples(instance.output_sent,
                                            parse_type='outtree'))
            else:
                approx_token_solns.append([])
                approx_dep_solns.append([])
            del instance.decoder

        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder='ilp',
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        exact_token_solns, exact_dep_solns = [], []
        for instance in eval_instances:
            if instance.decoder.has_solution():
                exact_token_solns.append(
                    [tuple(idx) for idx in instance.output_idxs])
                exact_dep_solns.append(
                    instance.get_dep_tuples(instance.output_sent,
                                            parse_type='outtree'))
            else:
                exact_token_solns.append([])
                exact_dep_solns.append([])

        token_optimality, dep_optimality = [], []
        num_correct_tokens, num_total_tokens = [], []
        num_correct_deps, num_total_deps = [], []
        num_failed_approx, num_failed_exact, num_succeeded = 0, 0, 0
        for approx_tokens, approx_deps, exact_tokens, exact_deps in zip(
                approx_token_solns, approx_dep_solns, exact_token_solns,
                exact_dep_solns):
            if len(approx_tokens) == 0:
                num_failed_approx += 1
            if len(exact_tokens) == 0:
                num_failed_exact += 1
            if len(approx_tokens) == 0 or len(exact_tokens) == 0:
                continue
            else:
                num_succeeded += 1

            assert len(approx_tokens) == len(exact_tokens)

            token_overlap = set(approx_tokens).intersection(exact_tokens)
            token_optimality.append(
                int(len(token_overlap) == len(exact_tokens)))
            num_correct_tokens.append(len(token_overlap))
            num_total_tokens.append(len(exact_tokens))

            dep_overlap = set(approx_deps).intersection(exact_deps)
            dep_optimality.append(int(len(dep_overlap) == len(exact_deps)))
            num_correct_deps.append(len(dep_overlap))
            num_total_deps.append(len(exact_deps))

        print "%d/%d (%.1f%%) optimal token solutions%s" % \
                (sum(token_optimality),
                 num_succeeded,
                 avg(token_optimality) * 100,
                 "; %d approx failed, %d exact failed" % \
                         (num_failed_approx, num_failed_exact)
                         if num_succeeded < len(eval_instances) else "")
        print "token optimality rate: %.1f%% over %d instances, " \
                                     "%.1f%% over %d tokens" % \
                (avg(correct/total * 100
                    for correct, total in zip(num_correct_tokens,
                                              num_total_tokens)),
                 num_succeeded,
                 sum(num_correct_tokens)/sum(num_total_tokens) * 100,
                 sum(num_total_tokens))
        print
        print "%d/%d (%.1f%%) optimal dep solutions%s" % \
                (sum(dep_optimality),
                 num_succeeded,
                 avg(dep_optimality) * 100,
                 "; %d approx failed, %d exact failed" % \
                         (num_failed_approx, num_failed_exact)
                         if num_succeeded < len(eval_instances) else "")
        print "dep optimality rate: %.1f%% over %d instances, " \
                                     "%.1f%% over %d deps" % \
                (avg(correct/total * 100
                    for correct, total in zip(num_correct_deps,
                                              num_total_deps)),
                 num_succeeded,
                 sum(num_correct_deps)/sum(num_total_deps) * 100,
                 sum(num_total_deps))
Beispiel #33
0
    def test_tightness(self, learner, partition='train', debug_idxs=None,
            skip_idxs=(), decoder='ilp', streaming=False,
            overwritten_params=(),**kwargs):
        """Note the proportion of integral solutions to LPs.
        """
        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder='ilp',
                                               relax=True,
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        print "idx\tsize\twords\toptwrds\tequiv?\tdeps\toptdeps"
        num_failed, num_tight, num_loose = 0, 0, 0
        token_tightness, dep_tightness = [], []
        for i, instance in enumerate(eval_instances):
            if not instance.decoder.has_solution():
                num_failed += 1
            elif instance.decoder.has_integral_solution(ndigits=3):
                num_tight += 1
                token_tightness.append(1)
                dep_tightness.append(1)
            else:
                num_loose += 1

                # Print some additional statistics for the loose ones
                relaxed = instance.decoder.get_integrality()
                instance.decoder.solve(relax=False)
                optimal = instance.decoder.get_integrality()

                # Ensure that the optimal result is integral for sanity
                for var_type in optimal:
                    assert len(optimal[var_type][0]) == 0

                # Check whether the relaxed word solution is the same as the
                # optimal solution, even if it's non-integral
                is_equiv = sorted([var_tuple[0].idx() for var_tuple in
                                  relaxed['word'][0] + relaxed['word'][1]]) \
                    == sorted([var_tuple[0].idx() for var_tuple in
                              optimal['word'][1]])

                print "%d:\t%d\t%d/%d\t%d\t%s\t%d/%d\t%d" % \
                        (i,
                         sum(sent.length for sent in instance.input_sents),
                         len(relaxed['word'][0]),
                         len(relaxed['word'][0]) + len(relaxed['word'][1]),
                         len(optimal['word'][1]),
                         '' if is_equiv else '!',
                         len(relaxed['dep'][0]),
                         len(relaxed['dep'][0]) + len(relaxed['dep'][1]),
                         len(optimal['dep'][1]),
                         )

                token_tightness.append(len(relaxed['word'][1]) / \
                        (len(relaxed['word'][0]) + len(relaxed['word'][1])))
                dep_tightness.append(len(relaxed['dep'][1]) / \
                        (len(relaxed['dep'][0]) + len(relaxed['dep'][1])))

                # If restricted to a few instances, print the details
                if len(eval_instances) < 3:
                    for feat_cat in ('word', 'dep'):
                        for integrality in (0,1):
                            for rel_tuple in relaxed[feat_cat][integrality]:
                                var, relaxed_value = rel_tuple
                                optimal_value = None
                                for opt_tuple in optimal[feat_cat][1]:
                                    if opt_tuple[0].idx() == var.idx():
                                        optimal_value = opt_tuple[1]
                                        break
                                print "%s\t%.3f\t%s" % \
                                        (var.readable_grounding(),
                                         relaxed_value, optimal_value)

        print "%d/%d (%.1f%%) integral solutions%s" % \
                (num_tight,
                 num_tight + num_loose,
                 (num_tight * 100) / float(num_tight + num_loose),
                 "; %d failed" % (num_failed,) if num_failed > 0 else "")
        print "token integrality rate: %.1f%%" % (avg(token_tightness) * 100,)
        print "dep integrality rate: %.1f%%" % (avg(dep_tightness) * 100,)
Beispiel #34
0
    def test_tightness(self,
                       learner,
                       partition='train',
                       debug_idxs=None,
                       skip_idxs=(),
                       decoder='ilp',
                       streaming=False,
                       overwritten_params=(),
                       **kwargs):
        """Note the proportion of integral solutions to LPs.
        """
        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder='ilp',
                                               relax=True,
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        print "idx\tsize\twords\toptwrds\tequiv?\tdeps\toptdeps"
        num_failed, num_tight, num_loose = 0, 0, 0
        token_tightness, dep_tightness = [], []
        for i, instance in enumerate(eval_instances):
            if not instance.decoder.has_solution():
                num_failed += 1
            elif instance.decoder.has_integral_solution(ndigits=3):
                num_tight += 1
                token_tightness.append(1)
                dep_tightness.append(1)
            else:
                num_loose += 1

                # Print some additional statistics for the loose ones
                relaxed = instance.decoder.get_integrality()
                instance.decoder.solve(relax=False)
                optimal = instance.decoder.get_integrality()

                # Ensure that the optimal result is integral for sanity
                for var_type in optimal:
                    assert len(optimal[var_type][0]) == 0

                # Check whether the relaxed word solution is the same as the
                # optimal solution, even if it's non-integral
                is_equiv = sorted([var_tuple[0].idx() for var_tuple in
                                  relaxed['word'][0] + relaxed['word'][1]]) \
                    == sorted([var_tuple[0].idx() for var_tuple in
                              optimal['word'][1]])

                print "%d:\t%d\t%d/%d\t%d\t%s\t%d/%d\t%d" % \
                        (i,
                         sum(sent.length for sent in instance.input_sents),
                         len(relaxed['word'][0]),
                         len(relaxed['word'][0]) + len(relaxed['word'][1]),
                         len(optimal['word'][1]),
                         '' if is_equiv else '!',
                         len(relaxed['dep'][0]),
                         len(relaxed['dep'][0]) + len(relaxed['dep'][1]),
                         len(optimal['dep'][1]),
                         )

                token_tightness.append(len(relaxed['word'][1]) / \
                        (len(relaxed['word'][0]) + len(relaxed['word'][1])))
                dep_tightness.append(len(relaxed['dep'][1]) / \
                        (len(relaxed['dep'][0]) + len(relaxed['dep'][1])))

                # If restricted to a few instances, print the details
                if len(eval_instances) < 3:
                    for feat_cat in ('word', 'dep'):
                        for integrality in (0, 1):
                            for rel_tuple in relaxed[feat_cat][integrality]:
                                var, relaxed_value = rel_tuple
                                optimal_value = None
                                for opt_tuple in optimal[feat_cat][1]:
                                    if opt_tuple[0].idx() == var.idx():
                                        optimal_value = opt_tuple[1]
                                        break
                                print "%s\t%.3f\t%s" % \
                                        (var.readable_grounding(),
                                         relaxed_value, optimal_value)

        print "%d/%d (%.1f%%) integral solutions%s" % \
                (num_tight,
                 num_tight + num_loose,
                 (num_tight * 100) / float(num_tight + num_loose),
                 "; %d failed" % (num_failed,) if num_failed > 0 else "")
        print "token integrality rate: %.1f%%" % (avg(token_tightness) * 100, )
        print "dep integrality rate: %.1f%%" % (avg(dep_tightness) * 100, )
Beispiel #35
0
    def test_optimality(self, learner, partition='train', debug_idxs=None,
            skip_idxs=(), decoder='lp+mst', streaming=False,
            overwritten_params=(),**kwargs):
        """Note the proportion of optimal solutions when approximating.
        """
        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder=decoder,
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        approx_token_solns, approx_dep_solns = [], []
        for instance in eval_instances:
            if instance.decoder.has_solution():
                approx_token_solns.append([tuple(idx)
                                        for idx in instance.output_idxs])
                approx_dep_solns.append(instance.get_dep_tuples(
                                        instance.output_sent,
                                        parse_type='outtree'))
            else:
                approx_token_solns.append([])
                approx_dep_solns.append([])
            del instance.decoder

        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder='ilp',
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        exact_token_solns, exact_dep_solns = [], []
        for instance in eval_instances:
            if instance.decoder.has_solution():
                exact_token_solns.append([tuple(idx)
                                        for idx in instance.output_idxs])
                exact_dep_solns.append(instance.get_dep_tuples(
                                        instance.output_sent,
                                        parse_type='outtree'))
            else:
                exact_token_solns.append([])
                exact_dep_solns.append([])

        token_optimality, dep_optimality = [], []
        num_correct_tokens, num_total_tokens = [], []
        num_correct_deps, num_total_deps = [], []
        num_failed_approx, num_failed_exact, num_succeeded = 0, 0, 0
        for approx_tokens, approx_deps, exact_tokens, exact_deps in zip(
                approx_token_solns, approx_dep_solns,
                exact_token_solns, exact_dep_solns):
            if len(approx_tokens) == 0:
                num_failed_approx += 1
            if len(exact_tokens) == 0:
                num_failed_exact += 1
            if len(approx_tokens) == 0 or len(exact_tokens) == 0:
                continue
            else:
                num_succeeded += 1

            assert len(approx_tokens) == len(exact_tokens)

            token_overlap = set(approx_tokens).intersection(exact_tokens)
            token_optimality.append(
                    int(len(token_overlap) == len(exact_tokens)))
            num_correct_tokens.append(len(token_overlap))
            num_total_tokens.append(len(exact_tokens))

            dep_overlap = set(approx_deps).intersection(exact_deps)
            dep_optimality.append(int(len(dep_overlap) == len(exact_deps)))
            num_correct_deps.append(len(dep_overlap))
            num_total_deps.append(len(exact_deps))

        print "%d/%d (%.1f%%) optimal token solutions%s" % \
                (sum(token_optimality),
                 num_succeeded,
                 avg(token_optimality) * 100,
                 "; %d approx failed, %d exact failed" % \
                         (num_failed_approx, num_failed_exact)
                         if num_succeeded < len(eval_instances) else "")
        print "token optimality rate: %.1f%% over %d instances, " \
                                     "%.1f%% over %d tokens" % \
                (avg(correct/total * 100
                    for correct, total in zip(num_correct_tokens,
                                              num_total_tokens)),
                 num_succeeded,
                 sum(num_correct_tokens)/sum(num_total_tokens) * 100,
                 sum(num_total_tokens))
        print
        print "%d/%d (%.1f%%) optimal dep solutions%s" % \
                (sum(dep_optimality),
                 num_succeeded,
                 avg(dep_optimality) * 100,
                 "; %d approx failed, %d exact failed" % \
                         (num_failed_approx, num_failed_exact)
                         if num_succeeded < len(eval_instances) else "")
        print "dep optimality rate: %.1f%% over %d instances, " \
                                     "%.1f%% over %d deps" % \
                (avg(correct/total * 100
                    for correct, total in zip(num_correct_deps,
                                              num_total_deps)),
                 num_succeeded,
                 sum(num_correct_deps)/sum(num_total_deps) * 100,
                 sum(num_total_deps))
Beispiel #36
0
    def check_dep_coverage(self,
                           partition='train',
                           debug_idxs=None,
                           skip_idxs=(),
                           var_conf=None):
        """Record the fraction of potential arcs that are present in gold
        trees.
        """
        var_flags = variables.TransductionVariables.parse_var_conf(var_conf)
        tgt_instances = self.get_instances(partition=partition,
                                           debug_idxs=debug_idxs,
                                           skip_idxs=skip_idxs)
        prev_average_overlap = None
        print "ancestor_limit\tavg_overlap_rate\tavg_reachability"
        for ancestor_limit in range(30):
            ancestor_limit = None if ancestor_limit == 0 \
                                  else ancestor_limit
            instance_overlaps = []
            instance_reachability = []

            for instance in tgt_instances:
                # TODO: merge with instance.get_overlap()
                sent_dep_tuples = instance.get_constrained_dep_tuples(
                    instance.sentences[0],
                    original_tree=var_flags['orig_deps'],
                    ancestor_dags=var_flags['anc_deps'],
                    pos_matching=var_flags['pos_deps'],
                    noninverted_deps=var_flags['noninv_deps'],
                    fixed_root=var_flags['fixed_root'],
                    verb_root=var_flags['verb_root'],
                    ancestor_limit=ancestor_limit)
                gold_overlaps = []
                for gold_sent in instance.gold_sentences:
                    gold_dep_tuples = instance.get_dep_tuples(
                        gold_sent, parse_type='dparse')
                    gold_dep_tuple_set = set(gold_dep_tuples)
                    overlap = gold_dep_tuple_set.intersection(sent_dep_tuples)
                    gold_overlaps.append(len(overlap) / \
                                         len(gold_dep_tuple_set))

#                    if len(overlap) < len(gold_dep_tuple_set):
#                        print instance.get_display_string()
#                        print gold_dep_tuple_set - overlap

                instance_overlaps.append(avg(gold_overlaps))
                instance_reachability.append(int(min(gold_overlaps) == 1))

            average_overlap = avg(instance_overlaps)
            average_reachability = avg(instance_reachability)
            if average_overlap == prev_average_overlap:
                continue
            prev_average_overlap = average_overlap

            print ancestor_limit, '\t\t', average_overlap,
            print '\t\t', average_reachability
            print '\t\t\t', sum(instance_overlaps),
            print '\t\t', sum(instance_reachability)
            print '\t\t\t', len(instance_overlaps),
            print '\t\t', len(instance_reachability)
            if not var_flags['anc_deps'] or \
                    (ancestor_limit > 0 and average_overlap == 1.0):
                break
Beispiel #37
0
def plot_intervals(output_folder):
    from parsers import CVOutputParser
    from preprocessing import Preprocessor
    from utils import avg
    import os
    import math
    """ 
    Given a cross validation ouput. Certain triple intervals can be plottet
    to compare the error for extrapolation, max ent and the heurestic.
    
    The algorithm runs through each triple interval, and then for each sampled estiamte output
    the triples in the interval are looked up in each sample and the MAPE error is 
    recorded and the average errors are added. And the average of these averages
    are then plottet for each interval.

    """
    if not output_folder[-1] == '/':
        output_folder += '/'
    intervals = 30
    triple_intervals = Preprocessor.triple_intervals(output_folder + 'observed_frequent_items.out', intervals=intervals)

    avg_max_ent_errors = []
    avg_ext_errors = []
    avg_heu_errors = []
    pair_triple_ratios = [i/10. for i in range(11)] # binned ratios [0.0 to 1.0]
    max_ent_ratio_error = [0 for i in range(11)]
    ext_ratio_error = [0 for i in range(11)]

    for index, triple_interval in enumerate(triple_intervals):
        print 'Triple interval {} of {}'.format(index, intervals)
        iteration = 0
        MAPE_avg_errors = []
        MAPE_avg_errors_ext = []
        # MAPE_avg_errors_heu = []
        while True:
            max_ent_est_file = output_folder + str(iteration) + '_data.tsv'
            ext_est_file = output_folder + str(iteration) + '_data_extrapolation.tsv'
            # heu_est_file = output_folder + str(iteration) + '_data_heurestic.tsv'
            # read baseline also?
            # Read until we do not find an output file
            if not os.path.exists(max_ent_est_file):
                break

            max_ent_est = CVOutputParser.read_est_obs_file(max_ent_est_file)
            ext_est = CVOutputParser.read_est_obs_file(ext_est_file)
            # heu_est = CVOutputParser.read_est_obs_file(heu_est_file)

            MAPE_errors = []
            MAPE_errors_ext = []
            # MAPE_errors_heu = []

            for triple in triple_interval:
                # Check that the triple has been estimated
                if triple in max_ent_est:

                    # Index 1 should hold the observed value parsed from the file
                    # is the same mapped to every estimate, so hust read it once.
                    obs = max_ent_est[triple][1]

                    # maxent estimate
                    est = max_ent_est[triple][0]

                    # extrapolation estimate
                    est2 = ext_est[triple][0]

                    # # independence estimat?

                    # heurestic, use max_ent for 0 triple in sample
                    # est4 = heu_est[triple][0]

                    # Index 2 should hold the pair triple ratio.
                    # is the sam for every estimat
                    ratio = max_ent_est[triple][2]
                    # bin the ratio to one decimal
                    ratio_binned = round(ratio, 1)
                    # add errors to the ratio
                    max_ent_ratio_error[pair_triple_ratios.index(ratio_binned)] += abs(est-obs) / float(obs)
                    ext_ratio_error[pair_triple_ratios.index(ratio_binned)] += abs(est2-obs) / float(obs)


                    # MAPE error max ent
                    # error = abs(obs-est) #/ float(obs) * 100
                    # MAPE_errors.append(error)

                    # # MAPE error extrapolation
                    # error2 = abs(obs-est2) #/ float(obs) * 100
                    # MAPE_errors_ext.append(error2)

                    # MAPE error independence?

                    # MAPE error heurestic
                    # error4 = abs(obs-est4) #/ float(obs) * 100
                    # MAPE_errors_heu.append(error4)

                    

                    # MAPE baseline error?
            MAPE_avg_errors.append(avg(MAPE_errors))
            MAPE_avg_errors_ext.append(avg(MAPE_errors_ext))
            # MAPE_avg_errors_heu.append(avg(MAPE_errors_heu))
            iteration += 1

        avg_max_ent_errors.append(avg(MAPE_avg_errors))
        avg_ext_errors.append(avg(MAPE_avg_errors_ext))
        # avg_heu_errors.append(avg(MAPE_avg_errors_heu))
        

    plot(range(len(avg_max_ent_errors)), avg_max_ent_errors, color='blue')
    plot(range(len(avg_ext_errors)), avg_ext_errors, color='red')
Beispiel #38
0
def triple_errors(output_folder, triple):
    from parsers import CVOutputParser
    from utils import interpolate, avg, confidence_interval
    import math
    from collections import Counter
    import os

    """ 
    Plot accumulated errors for estimators against pair triple ratios.
    Ratios are binned in the range 0.0 to 1.0.
    """
    if not output_folder[-1] == "/":
        output_folder += "/"

    iteration = -1
    max_ent_errors = []
    ext_errors = []
    max_ent_abs_errors = []
    ext_abs_errors = []
    samples_ignored = 0
    while True:
        iteration += 1
        max_ent_est_file = output_folder + str(iteration) + "_data.tsv"
        ext_est_file = output_folder + str(iteration) + "_data_extrapolation.tsv"
        # heu_est_file = output_folder + str(iteration) + '_data_heurestic.tsv'
        # read baseline also?
        # Read until we do not find an output file
        if not os.path.exists(max_ent_est_file):
            break

        # Read the maxent estimate
        found = False
        for sample_triple, (est, obs, ratio, triangle) in CVOutputParser.read_est_obs_file_disc_version_2(
            max_ent_est_file
        ):
            (s1, s2, s3, s12, s13, s23, s123) = triangle

            if sample_triple == triple:
                # if s123 == 0:
                #     break
                found = True
                max_ent_errors.append(est - obs)
                max_ent_abs_errors.append(abs(obs - est))
                break

        if not found:
            samples_ignored += 1
            continue

        for sample_triple, (est, obs, ratio, triangle) in CVOutputParser.read_est_obs_file_disc_version_2(ext_est_file):
            (s1, s2, s3, s12, s13, s23, s123) = triangle

            if sample_triple == triple:
                ext_errors.append(est - obs)
                ext_abs_errors.append(abs(obs - est))
                break

    # maxent confidence interval
    maxent_ci = confidence_interval(max_ent_errors)
    # extrapolation confidence interval
    ext_ci = confidence_interval(ext_errors)

    print "samples ignored: ", samples_ignored
    print "maxent avg error: ", round(avg(max_ent_errors), 1)
    print "maxent 95% confidence interval: ", (round(maxent_ci[0], 1), round(maxent_ci[1], 2))
    print "extrapolation avg error: ", round(avg(ext_errors), 1)
    print "extrapolation 95% confidence interval: ", (round(ext_ci[0], 1), round(ext_ci[1], 2))

    # round
    max_ent_errors_rounded = [round(x, 1) for x in max_ent_errors]
    ext_errors_rounded = [round(x, 1) for x in ext_errors]

    # plot
    xlabel("Estimate error")
    ylabel("Bucket size")
    # text(0.1, 0.8, 'Maxent')
    # text(0.1, 0.7, 'avg. error: ' + str(avg(max_ent_errors)))
    # text(0.1, 0.6, '95% conf. interval: ' + str(maxent_ci))

    # text(0.5, 0.8, 'Extrapolation')
    # text(0.5, 0.7, 'avg. error: ' + str(avg(ext_errors)))
    # text(0.5, 0.6, '95% conf. interval: ' + str(ext_ci))

    hist([max_ent_errors_rounded, ext_errors_rounded], color=("b", "r"))

    return max_ent_errors, max_ent_abs_errors, ext_errors, ext_abs_errors
Beispiel #39
0
def min_max_avg(array):
    """
    Returns min,max and avg elements of array..
    """
    return min(array), max(array), avg(array)

worst = increases[-1]
print_delta('worst:    ', worst)

n_01p = int(round(len(increases) / 100))  # Worst 1 percentile
if n_01p == 0: n_01p = 1
worst_01p = increases[-n_01p]
print_delta('worst  1%:', worst_01p)

n_10p = int(round(len(increases) / 10))  # Worst 10 percentile
if n_10p == 0: n_10p = 1
worst_10p = increases[-n_10p]
print_delta('worst 10%:', worst_10p)

print('increases that stopped early: %i' % stopped_early)

values_increase = []
values_val_loss = []
for node in increases:
    values_increase.append(node.get_val_loss_delta())
    values_val_loss.append(node.val_loss)
avg_increase = avg(values_increase)
avg_val_loss = avg(values_val_loss)
print('avg increase: %f' % avg_increase)
delta_ratio = 100.0 * avg_increase / avg_val_loss
print('avg increase fraction: %f' % delta_ratio)

file_increase_deltas = "increase-deltas-%s.data" % args.token
append(file_increase_deltas, "%i %5.1f" % (args.stage, delta_ratio))
Beispiel #41
0
 def dist_average (c0, c1):
     return Vector.distance(avg(c0), avg(c1))
Beispiel #42
0
 def average_distance (c0, c1):
     return avg( ClustComparers.distances(c0, c1) )
Beispiel #43
0
 def sum_of_squared_errors (self):
     normsq = lambda v : abs(Vector.norm(v, 1))
     clust_avg = lambda clst : (clst, avg(clst))
     clust_sqerr = lambda (clst, mu) : sum(normsq(v - mu) for v in clst)
     return sum( it.imap(clust_sqerr, it.imap(clust_avg, self)) )
Beispiel #44
0
    def __init__(self, n):  # n is the tessellation number
        self.clear()
        self.n = n

        if n == 1:  #Hexagons
            shape = 'hex2'
            img_n = 0
            l1 = []
            s = 70.0 / 11
            h = s * 1.732 / 2
            dx = s * 1.5
            dy = h
            x0 = s / 2 - .4
            y0 = 0 - .3
            x = x0
            y = y0
            for i in range(4):
                l1.append((x, y))
                x += dx
                y += dy
                dy = -dy
            x = x0
            y = y0 + 2 * h
            for i in range(4):
                l1.append((x, y))
                x += dx
                y += dy
                dy = -dy
            x = x0
            y = y0 + 4 * h
            for i in range(2):
                l1.append((x, y))
                x += 2 * dx
            self.init(l1, img_n, shape)

        elif n == 2:  # Squares
            # square 1
            shape = 'sq1'
            img_n = 0
            l1 = []
            s = 5.33
            d = s * 2
            y = s / 2
            for r in range(2):
                x = s / 2
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += d
            y = s / 2 + s
            for r in range(2):
                x = s / 2 + s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += d
            self.init(l1, img_n, shape)
            # square 2
            shape = 'sq2'
            img_n = 1
            l1 = []
            y = s / 2
            for r in range(2):
                x = s / 2 + s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += d
            y = s / 2 + s
            for r in range(2):
                x = s / 2
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += d
            self.init(l1, img_n, shape)

        elif n == 3:  #Triangles
            shape = 'tri1'
            img_n = 0
            l1 = []
            s = 5.33
            d = s * 2
            y0 = s * .866 - 3
            y = y0
            for r in range(2):
                x = s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += s * 1.732 * 2
            y = y0 + s * 1.732
            for r in range(1):
                x = 0
                for c in range(4):
                    l1.append((x, y))
                    x += d
                y += s * 1.732 * 2
            self.init(l1, img_n, shape)
            shape = 'tri2'
            img_n = 1
            l1 = []
            y = y0
            for r in range(2):
                x = 0
                for c in range(4):
                    l1.append((x, y))
                    x += d
                y += s * 1.732 * 2
            y = y0 + s * 1.732
            for r in range(1):
                x = s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += s * 1.732 * 2
            self.init(l1, img_n, shape)

        elif n == 4:
            # dodecagons
            shape = 'dodec'
            img_n = 0
            l1 = ((0, 1.5), (16, 1.5), (32, 1.5), (8, 15.3), (24, 15.3))
            self.init(l1, img_n, shape)
            # hexagons
            shape = 'hex'
            img_n = 1
            l1 = ((8, 6), (24, 6), (0, 10.8), (16, 10.8), (32, 10.8),
                  (0, 19.8), (16, 19.8), (32, 19.8))
            self.init(l1, img_n, shape)
            # squares @ 30deg
            shape = 'sq30'
            img_n = 2
            l1 = ((12, 8.4), (28, 8.4), (4, 22.1), (20, 22.1))
            self.init(l1, img_n, shape)
            # squares @ -30deg
            shape = 'sq_30'
            img_n = 3
            l1 = ((4, 8.4), (20, 8.4), (12, 22.1), (28, 22.1))
            self.init(l1, img_n, shape)
            # squares
            shape = 'sq'
            img_n = 4
            l1 = ((8, 1.5), (24, 1.5), (0, 15.3), (16, 15.3), (32, 15.3))
            self.init(l1, img_n, shape)

        elif n == 5:  # Alhambra
            x0 = 2.9
            y0 = 2.4
            dx = 6.55
            dy = 6.55
            # red
            shape = 'red'
            img_n = 0
            l1 = []
            y = y0
            for r in range(2):
                x = x0
                for c in range(3):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            y = y0 + dy
            for r in range(2):
                x = x0 + dx
                for c in range(2):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            self.init(l1, img_n, shape)
            # yellow
            shape = 'yellow'
            img_n = 1
            l1 = []
            y = y0
            for r in range(2):
                x = x0 + dx
                for c in range(2):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            y = y0 + dy
            for r in range(2):
                x = x0
                for c in range(3):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            self.init(l1, img_n, shape)
            # blue
            shape = 'blue'
            img_n = 2
            l1 = []
            y = y0 + dy / 2
            for r in range(2):
                x = x0 + dx / 2
                for c in range(3):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            y = y0 - dy / 2
            for r in range(2):
                x = x0 + dx + dx / 2
                for c in range(2):
                    if r == 1 and c == 0: l1.append((x - 2 * dx, y), )
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            self.init(l1, img_n, shape)
            # green
            shape = 'green'
            img_n = 3
            l1 = []
            y = y0 + dy / 2
            for r in range(2):
                x = x0 - dx / 2
                for c in range(3):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            y = y0 - dy / 2
            for r in range(2):
                x = x0 + dx - dx / 2
                for c in range(2):
                    if r == 1 and c == 1: l1.append((x + 2 * dx, y), )
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            self.init(l1, img_n, shape)

        elif n == 6:  # Hexagons & Triangles
            shape = 'hex3'
            img_n = 0
            l1 = []
            s = 5.333
            d = s * 2
            x0 = 0
            y0 = 0
            h = s * 1.732 / 2
            y = y0
            for r in range(2):
                x = x0 + s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += h * 4
            x = x0 + 0
            y = y0 + h * 2
            for c in range(4):
                l1.append((x, y))
                x += d
            self.init(l1, img_n, shape)
            shape = 'tri3'
            img_n = 1
            l1 = []
            y = y0 + h / 2
            for r in range(2):
                x = x0
                for c in range(4):
                    l1.append((x, y))
                    x += d
                y += h * 4
            x = x0 + s
            y = y0 + h * 2.5
            for c in range(3):
                l1.append((x, y))
                x += d
            self.init(l1, img_n, shape)
            shape = 'tri4'
            img_n = 2
            l1 = []
            x = x0 + s
            y = y0 + h * 1.5
            for c in range(3):
                l1.append((x, y))
                x += d
            x = x0
            y = y0 + h * 3.5
            for c in range(4):
                l1.append((x, y))
                x += d
            self.init(l1, img_n, shape)

        elif n == 7:  # Hexagons & Triangles & Squares
            shape = 'hex6'
            img_n = 0
            l1 = []
            s = 4.96
            y0 = .55
            h = s * 1.732 / 2
            x0 = 16 - s - 2 * h
            y = y0
            dx = s + 2 * h
            dy = 3 * s + 2 * h
            for r in range(2):
                x = x0
                for c in range(3):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = x0 + s / 2 + h
            y = y0 + 1.5 * s + h
            l1 += ((x, y), (x + dx, y))
            self.init(l1, img_n, shape)
            c0 = l1[0]
            c3 = l1[3]
            c6 = l1[6]
            shape = 'sq6'
            img_n = 1
            l1 = []
            x = x0 + h + s / 2
            y = y0
            l1 = [(x, y), (x + dx, y)]
            x = x0
            y = y0 + 1.5 * s + h
            l1 += ((x, y), (x + dx, y), (x + 2 * dx, y))
            self.init(l1, img_n, shape)
            shape = 'sq7'
            img_n = 2
            (x, y1) = utils.avg(c0, c6)
            l1 = [(x, y1), (x + dx, y1), (x + 2 * dx, y1)]
            x -= dx / 2
            (t, y2) = utils.avg(c6, c3)
            l1 += [(x, y2), (x + dx, y2), (x + 2 * dx, y2)]
            self.init(l1, img_n, shape)
            shape = 'sq8'
            img_n = 3
            l1 = [(x, y1), (x + dx, y1), (x + 2 * dx, y1)]
            x += dx / 2
            l1 += [(x, y2), (x + dx, y2), (x + 2 * dx, y2)]
            self.init(l1, img_n, shape)
            shape = 'tri6'
            img_n = 4
            dy = y2 - y1
            x = x0 + h + s / 2
            y1 = y0 + s / 2 + h / 2
            l1 = [(x, y1), (x + dx, y1)]
            x -= dx / 2
            y = y1 + dy
            l1 += [(x, y), (x + dx, y), (x + 2 * dx, y)]
            self.init(l1, img_n, shape)
            shape = 'tri7'
            img_n = 5
            x = x0 + h + s / 2 - dx / 2
            y1 = y0 + s + h / 2
            l1 = [(x, y1), (x + dx, y1), (x + 2 * dx, y1)]
            x += dx / 2
            y = y1 + dy
            l1 += [(x, y), (x + dx, y)]
            self.init(l1, img_n, shape)

        elif n == 8:  # Octagons & Squares
            shape = 'oct'
            img_n = 0
            l1 = []
            y = 4
            for r in range(3):
                x = 4
                for c in range(4):
                    l1.append((x, y))
                    x += 8
                y += 8
            self.init(l1, img_n, shape)
            shape = 'sq9'
            img_n = 1
            l1 = []
            y = 0
            for r in range(3):
                x = 0
                for c in range(5):
                    l1.append((x, y))
                    x += 8
                y += 8
            self.init(l1, img_n, shape)

        elif n == 9:  # Dodecagons & Triangles
            shape = 'dodec9'
            img_n = 0
            l1 = []
            y = 4.68
            dx = 22.48
            dy = 12.88
            for r in range(2):
                x = 4.76
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = 16
            y = 4.68 - dy / 2
            for r in range(3):
                l1.append((x, y))
                y += dy
            self.init(l1, img_n, shape)

        elif n == 10:  # Squares & Triangles
            shape = 'tri10'
            img_n = 0
            l1 = []
            s = 5.856
            h = s * 1.732 / 2
            dx = 16
            dy = dx
            y = s / 2
            for r in range(2):
                x = h / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = h / 2 + dx / 2
            y = s / 2 + dy / 2
            for c in range(2):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'sq10'
            img_n = 1
            l1 = []
            y = (1.5 * s + h) / 2
            for r in range(2):
                x = (.5 * s + h) / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            y = (1.5 * s + h) / 2 - dy / 2
            for r in range(2):
                x = (.5 * s + h) / 2 + dx / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            self.init(l1, img_n, shape)
            shape = 'sq11'
            img_n = 2
            l1 = []
            y = (1.5 * s + h) / 2 - dy / 2
            for r in range(2):
                x = (.5 * s + h) / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            y = (1.5 * s + h) / 2
            for r in range(2):
                x = (.5 * s + h) / 2 + dx / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            self.init(l1, img_n, shape)
            shape = 'tri11'
            img_n = 3
            l1 = []
            y = s / 2
            for r in range(2):
                x = 16 - h / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = s / 2 + h / 2
            y = s / 2 + dy / 2
            for c in range(2):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'tri12'
            img_n = 4
            l1 = []
            y = s + h / 2 - dy / 2
            for r in range(2):
                x = h + s / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = 0
            y = s + h / 2
            for c in range(3):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'tri13'
            img_n = 5
            l1 = []
            y = s / 2 + h / 2
            for r in range(2):
                x = h + s / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = 0
            y = s + h + h / 2
            for c in range(3):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)

        elif n == 11:  # Squares & Triangles
            shape = 'sq14'
            img_n = 0
            l1 = []
            s = 8
            h = s * 1.732 / 2
            x = s / 2
            y = s / 2
            dx = s
            dy = s + h
            for c in range(4):
                l1.append((x, y))
                x += dx
            x = 0
            y += dy
            for c in range(5):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'tri15'
            img_n = 1
            l1 = []
            x = s / 2
            y = s + h / 2
            dx = s
            for c in range(4):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'tri14'
            img_n = 2
            l1 = []
            x = 0
            y = s + h / 2
            dx = s
            for c in range(5):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)

        elif n == 12:  # Hexagons & Triangles
            s = 5.333
            h = s * 1.732 / 2
            shape = 'hex16'
            img_n = 0
            l1 = []
            l1.append((s / 2, h))
            l1.append((3 * s, 2 * h))
            l1.append((5 * s, 0))
            l1.append((s, 4 * h))
            l1.append((3.5 * s, 5 * h))
            l1.append((5.5 * s, 3 * h))
            self.init(l1, img_n, shape)
            shape = 'tri16'
            img_n = 1
            l1 = []
            dx = s
            x = 1.5 * s
            y = .5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            x = 4 * s
            y = 1.5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            l1.append((2 * s, y))
            x = s / 2
            y = 2.5 * h
            for c in range(2):
                l1.append((x, y))
                x += dx
            l1.append((4.5 * s, y))
            x = 2 * s
            y = 3.5 * h
            l1.append((0, y))
            for c in range(3):
                l1.append((x, y))
                x += dx
            x = 4.5 * s
            y = 4.5 * h
            l1.append((2.5 * s, y))
            for c in range(2):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)

            shape = 'tri17'
            img_n = 2
            l1 = []
            x = 2 * s
            y = .5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            l1.append((32, y))
            x = 4.5 * s
            y = 1.5 * h
            l1.append((1.5 * s, y))
            for c in range(2):
                l1.append((x, y))
                x += dx
            x = 0
            y = 2.5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            l1.append((4 * s, y))
            x = 2.5 * s
            y = 3.5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            y = 4.5 * h
            l1.append((0, y))
            l1.append((2 * s, y))
            l1.append((5 * s, y))
            l1.append((32, y))
            self.init(l1, img_n, shape)
        self.setup()
Beispiel #45
0
    def evaluate(self, learner,
            partition='test', debug_idxs=None, skip_idxs=(), decoder='ilp',
            n_eval=(1,2,3,4), streaming=True, overwritten_params=(),
            eval_path=None, output_path=None, lm_proxy=None, **kwargs):
        """Run the transduction model on designated test instances and report
        performance metrics.
        """
        # When evaluating multiple iterations of the same model over a fixed
        # partition, decoding should ensure that initialization isn't
        # unnecessarily repeated.
        if learner is not None:
            eval_instances = self.decode_instances(learner,
                                                   partition=partition,
                                                   debug_idxs=debug_idxs,
                                                   skip_idxs=skip_idxs,
                                                   decoder=decoder,
                                                   streaming=streaming,
                                                   overwritten_params=\
                                                           overwritten_params,
                                                   **kwargs)
            system_name = learner.name
        else:
            eval_instances = self.get_instances(partition=partition,
                                                debug_idxs=debug_idxs,
                                                skip_idxs=skip_idxs)
            system_name = 'baseline'
        num_instances = len(eval_instances)

        # Record overwritten parameters in the filenames
        overwriting_str = None
        if len(overwritten_params) > 0:
            overwriting_str = '_OW-'
            i = 0
            for param_name, value in overwritten_params.iteritems():
                if isinstance(value, list) or isinstance(value, tuple):
                    overwriting_str += '+'.join(str(v) for v in sorted(value))
                else:
                    overwriting_str += str(value)
                i += 1
                if i < len(overwritten_params):
                    overwriting_str += '-'

        if output_path is not None:
            output_filename = ''.join((output_path, '/',
                    '_'.join((partition, 'under', system_name)),
                    overwriting_str if overwriting_str is not None else '',
                    '_', decoder, '.out'))
            outf = open(output_filename, 'wb')

        # Determine the evaluations to run by looking at a representative
        # instance
        i = 0
        while i < len(eval_instances) and \
                not hasattr(eval_instances[i], 'output_sent'):
            i += 1
        if i == len(eval_instances):
            print "WARNING: all instances failed; skipping evaluation"
            sys.exit()
        some_instance = eval_instances[i]
        has_labels = hasattr(some_instance, 'label_sentences')
        has_rasp = hasattr(some_instance.gold_sentences[0], 'relgraph')
        has_outtrees = hasattr(some_instance.output_sent, 'outtree')
        has_outframes = hasattr(some_instance.output_sent, 'outframes')

        # FIXME TEMPORARY! MUST MAKE "False" FOR TEST!
        skip_failed = False

        # Initialize the evaluations
        eval_obj = evaluation.Evaluation(title='TRANSDUCTION_EVAL')
        output_sents = []
        with timer.AvgTimer(num_instances):
            for i, instance in enumerate(eval_instances):
                sys.stdout.write("Evaluating " + str(num_instances) +
                        (" " + partition if partition is not None else "") +
                        " instances: " + str(i+1) + '\r')

                # Duration and failure status
                eval_obj.include(
                        system=system_name,
                        corpus='other',
                        decode_time=instance.decode_times[-1],
                        solution_time=instance.solution_times[-1] \
                                if len(instance.solution_times) > 0 else 0,
                        inputs=len(instance.input_sents),
                        _failed=int(not hasattr(instance, 'output_sent')),
                        )

                if skip_failed and not hasattr(instance, 'output_sent'):
                    print "WARNING: Skipping failed instance", instance.idx
                    continue

                # POS tag recall
                for use_labels in set([False]) | set([has_labels]):
                    for prefix in ('NN', 'VB', 'JJ', 'RB'):
                        p, r, f = instance.score_content_words(
                                use_labels=use_labels, prefixes=(prefix,))
                        eval_obj.add_metrics(
                                precision=p,
                                recall=r,
                                system=system_name,
                                corpus=('LBLs ' + prefix) if use_labels \
                                        else ('GOLD ' + prefix),
                                )

                try:
                    if lm_proxy is not None:
                        output_tokens = instance.output_sent.tokens \
                                if hasattr(instance, 'output_sent') else []
                        eval_obj.include(
                                system=system_name,
                                corpus='other',
                                lm=lm_proxy.score_sent(output_tokens)
                                )
                except jsonrpc.RPCTransportError:
                    print "ERROR: JSON-RPC hiccups; skipping LM scoring"
                    pass

                if decoder.startswith('dp+'):
                        # Record convergence of dual decomposition or
                        # bisection. Will be 0 if neither are used.
                        eval_obj.include(
                                system=system_name,
                                corpus='other',
                                convergence_=int(instance.converged),
                                iterations=instance.num_iterations,
                                )

                if len(instance.sentences) == 1:
                    # Paraphrasing or compression-specific metrics
                    eval_obj.include(
                            system=system_name,
                            corpus='STATS gold',
                            comp_=instance.get_gold_compression_rate(),
                            length=instance.avg_gold_len,
                            proj_=avg(int(gold_sent.dparse.is_projective())
                                for gold_sent in instance.gold_sentences),
                            overlap_=avg(instance.get_overlap(gold_sent)
                                for gold_sent in instance.gold_sentences),
                            )
                    eval_obj.include(
                            system=system_name,
                            corpus='STATS input',
                            comp_=1.0,
                            length=instance.avg_len,
                            proj_=int(
                                instance.sentences[0].dparse.is_projective()),
                            overlap_=instance.get_overlap(
                                instance.sentences[0])
                            )
                    eval_obj.include(
                            system=system_name,
                            corpus='STATS output',
                            comp_=instance.get_compression_rate(),
                            length=len(instance.output_sent.tokens)
                                    if hasattr(instance, 'output_sent') else 0,
                            )
                    if hasattr(instance, 'output_sent') and has_outtrees:
                        eval_obj.include(
                                system=system_name,
                                corpus='STATS output',
                                proj_=int(instance.output_sent.\
                                          outtree.is_projective())
                                      if hasattr(instance.output_sent.outtree,\
                                                 'is_projective')
                                      else 0,
                                overlap_=instance.get_overlap(
                                    instance.output_sent,
                                    parse_type='outtree')
                                )

#                    print "INSTANCE ", instance.idx
#                    crossing_edges = \
#                        instance.output_sent.outtree.get_crossing_edges()
#                    print "\n\nINPUT:",
#                    self.dump_parse(instance.sentences[0])
#
#                    for gs, gold_sent in enumerate(
#                            instance.gold_sentences):
#                        # get output indices for gold
#                        gold_idxs = []
#                        i = 0
#                        for token in gold_sent.tokens:
#                            while instance.sentences[0].tokens[i] != token:
#                                i += 1
#                            gold_idxs.append((0,i))
#
#                        print "\nGOLD:", gs,
#                        self.dump_parse(gold_sent,
#                            idx_mapper=gold_idxs)
#
#                    print "\n\nOUTPUT:",
#                    self.dump_parse(instance.output_sent,
#                            parse_type='outtree',
#                            crossing_edges=crossing_edges,
#                            idx_mapper=instance.output_idxs)

                # n-gram precision and recall
                for use_labels in set([False]) | set([has_labels]):
                    for n in n_eval:
                        p, r, f = instance.score_ngrams(n=n,
                                use_labels=use_labels)
                        eval_obj.add_metrics(
                                precision=p,
                                recall=r,
                                system=system_name,
                                corpus='LBLs n='+str(n) if use_labels else
                                       'GOLD n='+str(n),
                                )
                if hasattr(instance, 'output_sent') and has_outframes:
                    # Precision and recall for frames
                    p, r, f = instance.score_frames(fes=False,
                                                    frames_type='outframes',
                                                    use_labels=use_labels)
                    eval_obj.add_metrics(
                            precision=p,
                            recall=r,
                            system=system_name,
                            corpus="GOLD frames",
                            )

                    # Precision and recall for frame elements
                    p, r, f = instance.score_frames(fes=True,
                                                    frames_type='outframes',
                                                    use_labels=use_labels)
                    eval_obj.add_metrics(
                            precision=p,
                            recall=r,
                            system=system_name,
                            corpus="GOLD fes",
                            )

                # Parse output sentences for syntactic evaluation. The
                # 100 token limit is intended for the Stanford parser.
                if hasattr(instance, 'output_sent') and \
                        len(instance.output_sent.tokens) <= 100:
                    output_sents.append(instance.output_sent)

                # Write the output to a file
                if output_path is not None:
                    outf.write(instance.get_display_string())
#            print
            if output_path is not None:
                outf.close()

            # Parse-based evaluations
            try:
                parse_types = ['dparse']
                if has_outtrees:
                    parse_types.append('outtree')

                # Get annotations. Only run RASP if the inputs have RASP
                # annotations since it's slow
                annotations.annotate(output_sents, 'Stanford')
                if has_rasp:
                    annotations.annotate(output_sents, 'Rasp')
                    parse_types.append('relgraph')

                # Add dependency results to evaluations
                for i, instance in enumerate(eval_instances):
                    if skip_failed and not hasattr(instance, 'output_sent'):
                        print "WARNING: Skipping failed instance",
                        print instance.idx, "again"
                        continue

                    for parse_type in parse_types:
                        for use_labels in set([False]) | set([has_labels]):
                            name = ('LBLs ' if use_labels else 'GOLD ') + \
                                parse_type
                            p, r, f = instance.score_dependencies(
                                    parse_type=parse_type,
                                    use_labels=use_labels)
                            eval_obj.add_metrics(
                                    precision=p,
                                    recall=r,
                                    system=system_name,
                                    corpus=name,
                                    _failed=int(not instance.has_output_parses(
                                            parse_type=parse_type)))
            except OSError:
                print "Skipping parser evaluations"

        print eval_obj.title
        print eval_obj.table(skip_single_keys=True)
        if eval_path is not None and debug_idxs is None:
            eval_filename = ''.join((eval_path, '/',
                    '_'.join((partition, 'under', system_name)),
                    overwriting_str if overwriting_str is not None else '',
                    '_', decoder,
                    '.eval'))
            eval_obj.save(eval_filename, append=False)
Beispiel #46
0
def calc_avg_errors(output_folder):

    from parsers import CVOutputParser
    from utils import interpolate, avg
    import math
    from collections import Counter
    import os
    """ 
    Average error calculation on CV output.
    """
    if not output_folder[-1] == '/':
        output_folder += '/'
    
    # better_than_baseline_file = open('better_than_base_line.tsv', 'w')
    # better_than_baseline_file.write('est\tobs\tn1\tn2\tn3\tpair_trip_ratio\ts1\ts2\ts3\ts12\ts13\ts23\ts123\n')

    # small_error_file = open('small_error.tsv', 'w')
    # small_error_file.write('est\tobs\tn1\tn2\tn3\tpair_trip_ratio\ts1\ts2\ts3\ts12\ts13\ts23\ts123\n')    
    baseline = 88.5
    iteration = 0
    points_evaluated = 0
    over_estimates = 0
    all_sample_errors = []
    while True:
        tsv_file = output_folder + str(iteration) + '_data_zero_trips.tsv'

        if not os.path.exists(tsv_file):
            break

        sample_erros = []
        for (n1, n2, n3), (est, obs, ratio, triangle) in CVOutputParser.read_est_obs_file_disc_version_2(tsv_file):

            s1, s2, s3, s12, s13, s23, s123 = triangle

            # if int(obs) < 200 or s123 == 0:
            #     continue

            # Heurestiv for extrapolation, 200000 in sample
            # est = min(s12, s13, s23) / 200000. * (21006480-200000)

            points_evaluated += 1
            if est > obs:
                over_estimates += 1

            # if obs > baseline:
            #     if abs(est-obs) < abs(est-baseline):
            #         better_than_baseline_file.write(str(est) + '\t' + str(obs) + '\t' + str(n1) + '\t' + str(n2) + '\t' + str(n3) + '\t' + str(ratio) + '\t' + str(s1) + '\t' + str(s2) + '\t' + str(s3) + '\t' + str(s12) + '\t' + str(s13) + '\t' + str(s23) + '\t' + str(s123) + '\n')

            error = abs(est-obs) / math.sqrt(obs)
            # if error < 3:
            #     small_error_file.write(str(est) + '\t' + str(obs) + '\t' + str(n1) + '\t' + str(n2) + '\t' + str(n3) + '\t' + str(ratio) + '\t' + str(s1) + '\t' + str(s2) + '\t' + str(s3) + '\t' + str(s12) + '\t' + str(s13) + '\t' + str(s23) + '\t' + str(s123) + '\n')
            sample_erros.append(error)
        all_sample_errors.append(avg(sample_erros))
        iteration += 1

    # better_than_baseline_file.close()
    # small_error_file.close()

    avg_error = avg(all_sample_errors)
    print 'avg_error ', avg_error
    print 'points evaluated', points_evaluated
    print 'over estimates: ', over_estimates
    return avg_error, all_sample_errors
Beispiel #47
0
    def check_dep_coverage(self, partition='train', debug_idxs=None,
            skip_idxs=(), var_conf=None):
        """Record the fraction of potential arcs that are present in gold
        trees.
        """
        var_flags = variables.TransductionVariables.parse_var_conf(var_conf)
        tgt_instances = self.get_instances(partition=partition,
                                           debug_idxs=debug_idxs,
                                           skip_idxs=skip_idxs)
        prev_average_overlap = None
        print "ancestor_limit\tavg_overlap_rate\tavg_reachability"
        for ancestor_limit in range(30):
            ancestor_limit = None if ancestor_limit == 0 \
                                  else ancestor_limit
            instance_overlaps = []
            instance_reachability = []

            for instance in tgt_instances:
                # TODO: merge with instance.get_overlap()
                sent_dep_tuples = instance.get_constrained_dep_tuples(
                                instance.sentences[0],
                                original_tree=var_flags['orig_deps'],
                                ancestor_dags=var_flags['anc_deps'],
                                pos_matching=var_flags['pos_deps'],
                                noninverted_deps=var_flags['noninv_deps'],
                                fixed_root=var_flags['fixed_root'],
                                verb_root=var_flags['verb_root'],
                                ancestor_limit=ancestor_limit)
                gold_overlaps = []
                for gold_sent in instance.gold_sentences:
                    gold_dep_tuples = instance.get_dep_tuples(
                                gold_sent,
                                parse_type='dparse')
                    gold_dep_tuple_set = set(gold_dep_tuples)
                    overlap = gold_dep_tuple_set.intersection(
                                sent_dep_tuples)
                    gold_overlaps.append(len(overlap) / \
                                         len(gold_dep_tuple_set))

#                    if len(overlap) < len(gold_dep_tuple_set):
#                        print instance.get_display_string()
#                        print gold_dep_tuple_set - overlap

                instance_overlaps.append(avg(gold_overlaps))
                instance_reachability.append(int(min(gold_overlaps) == 1))

            average_overlap = avg(instance_overlaps)
            average_reachability = avg(instance_reachability)
            if average_overlap == prev_average_overlap:
                continue
            prev_average_overlap = average_overlap

            print ancestor_limit, '\t\t', average_overlap,
            print '\t\t', average_reachability
            print '\t\t\t', sum(instance_overlaps),
            print '\t\t', sum(instance_reachability)
            print '\t\t\t', len(instance_overlaps),
            print '\t\t', len(instance_reachability)
            if not var_flags['anc_deps'] or \
                    (ancestor_limit > 0 and average_overlap == 1.0):
                break