Python avg Beispiele, utils.avg Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: cross_validation.py Projekt: AndersHqst/SAAS-2014

def plot_intervals_restricted_cv(output_folders):
    # Intervals by indexes, the actual indexes can currently only be seen from the print out
    # from the preprocessor
    # Run this on a CV that has been run on restricted intervals
    intervals = range(len(output_folders))
    AVG_avg_errors = []
    AVG_avg_errors_baseline = []
    AVG_avg_errors_ext = []
    #AVG_avg_errors_heu = []
    #AVG_avg_errors_ind = []

    for index, folder in enumerate(output_folders):

        # save average of averages with the given interval
        avg_errors = pickle.load(open(folder + 'avg_errors.pickle', 'r'))
        avg_errors_baseline = pickle.load(open(folder + 'avg_errors_baseline.pickle', 'r'))
        avg_errors_ext = pickle.load(open(folder + 'avg_errors_ext.pickle', 'rb'))
        #avg_errors_heu = pickle.load(open(folder + 'avg_errors_heu.pickle', 'rb'))
        #avg_errors_ind = pickle.load(open(folder + 'avg_errors_ind.pickle', 'rb'))

        AVG_avg_errors.append(avg(avg_errors))
        AVG_avg_errors_baseline.append(avg(avg_errors_baseline))
        AVG_avg_errors_ext.append(avg(avg_errors_ext))
        #AVG_avg_errors_heu.append(avg(avg_errors_heu))
        #AVG_avg_errors_ind.append(avg(avg_errors_ind))

    # plot
    plot(intervals, AVG_avg_errors, color='blue')
    plot(intervals, AVG_avg_errors_ext, color='red')
    #plot(intervals, AVG_avg_errors_ind, color='green')
    #plot(intervals, AVG_avg_errors_heu, color='yellow')
    plot(intervals, AVG_avg_errors_baseline, color='purple')

Beispiel #2

0

Datei anzeigen

Datei: corpus.py Projekt: wanlinxie/dissertation

    def check_frame_coverage(self,
                             partition='train',
                             debug_idxs=None,
                             skip_idxs=()):
        """Record the fraction of potential frames and FEs that are present
        in gold compressions.
        """
        tgt_instances = self.get_instances(partition=partition,
                                           debug_idxs=debug_idxs,
                                           skip_idxs=skip_idxs)

        print "items\tavg_overlap_rate\tavg_reachability\tnum_frameless"
        for item in ['frame', 'fe']:
            instance_overlaps = []
            instance_reachability = []
            num_frameless = 0

            for instance in tgt_instances:
                sent_frame_tuples = getattr(instance, 'get_' + item +
                                            '_tuples')(instance.sentences[0])
                gold_overlaps = []
                for gold_sent in instance.gold_sentences:
                    gold_frame_tuples = getattr(instance, 'get_' + item +
                                                '_tuples')(gold_sent)
                    if len(gold_frame_tuples) == 0:
                        gold_overlaps.append(1)  # always reachable
                        num_frameless += 1
                        break
                    gold_frame_tuple_set = set(gold_frame_tuples)
                    overlap = gold_frame_tuple_set.intersection(
                        sent_frame_tuples)
                    gold_overlaps.append(len(overlap) / \
                                            len(gold_frame_tuple_set))

                instance_overlaps.append(avg(gold_overlaps))
                instance_reachability.append(int(min(gold_overlaps) == 1))

            print item, '\t', avg(instance_overlaps),
            print '\t\t', avg(instance_reachability),
            print '\t\t', num_frameless

        # We also need to check which frames are present and how many FEs
        # they have
        num_fes = defaultdict(int)
        in_tgts = 0
        for instance in tgt_instances:
            for sentence in instance.gold_sentences:  # + instance.input_sents:
                for frame in sentence.frames.nodes:
                    key = sum(
                        int(hasattr(edge, 'fe'))
                        for edge in frame.outgoing_edges.itervalues())
                    num_fes[key] += 1
                    in_tgts += sum(
                        int(hasattr(edge, 'target') and hasattr(edge, 'fe'))
                        for edge in frame.outgoing_edges.itervalues())

        print "Histogram of FEs per frame:", dict(num_fes)
        print "Number of FEs which are also targets:", in_tgts

Beispiel #3

0

Datei anzeigen

Datei: corpus.py Projekt: mcka1n/dissertation

    def check_frame_coverage(self, partition='train', debug_idxs=None,
            skip_idxs=()):
        """Record the fraction of potential frames and FEs that are present
        in gold compressions.
        """
        tgt_instances = self.get_instances(partition=partition,
                                           debug_idxs=debug_idxs,
                                           skip_idxs=skip_idxs)

        print "items\tavg_overlap_rate\tavg_reachability\tnum_frameless"
        for item in ['frame', 'fe']:
            instance_overlaps = []
            instance_reachability = []
            num_frameless = 0

            for instance in tgt_instances:
                sent_frame_tuples = getattr(instance,
                        'get_' + item + '_tuples')(instance.sentences[0])
                gold_overlaps = []
                for gold_sent in instance.gold_sentences:
                    gold_frame_tuples = getattr(instance,
                            'get_' + item + '_tuples')(gold_sent)
                    if len(gold_frame_tuples) == 0:
                        gold_overlaps.append(1)     # always reachable
                        num_frameless += 1
                        break
                    gold_frame_tuple_set = set(gold_frame_tuples)
                    overlap = gold_frame_tuple_set.intersection(
                                sent_frame_tuples)
                    gold_overlaps.append(len(overlap) / \
                                            len(gold_frame_tuple_set))

                instance_overlaps.append(avg(gold_overlaps))
                instance_reachability.append(int(min(gold_overlaps) == 1))

            print item, '\t', avg(instance_overlaps),
            print '\t\t', avg(instance_reachability),
            print '\t\t', num_frameless

        # We also need to check which frames are present and how many FEs
        # they have
        num_fes = defaultdict(int)
        in_tgts = 0
        for instance in tgt_instances:
            for sentence in instance.gold_sentences: # + instance.input_sents:
                for frame in sentence.frames.nodes:
                    key = sum(int(hasattr(edge, 'fe'))
                              for edge in frame.outgoing_edges.itervalues())
                    num_fes[key] += 1
                    in_tgts += sum(int(hasattr(edge, 'target') and
                                       hasattr(edge, 'fe'))
                              for edge in frame.outgoing_edges.itervalues())

        print "Histogram of FEs per frame:", dict(num_fes)
        print "Number of FEs which are also targets:", in_tgts

Beispiel #4

0

Datei anzeigen

Datei: mf_explicit_prep_test.py Projekt: joaoms/incRec

 def test_predict(self):
     dimension = 10
     matrix = [[randint(1, 10) for _i in range(0, dimension)]
               for _c in range(0, dimension)]
     cf = MFExplicitPrepSGD(matrix, lf=4)
     for user_id in range(dimension):
         with self.subTest(i=user_id):
             avg_user = avg(matrix[user_id])
             for item_id in range(len(matrix[user_id])):
                 avg_item = avg(cf.matrix.col(item_id))
                 with self.subTest(i=item_id):
                     prep = cf.predict_prep(user_id, item_id)
                     real = prep + 0.5 * (avg_user + avg_item)
                     self.assertEqual(cf.predict(user_id, item_id), real)

Beispiel #5

0

Datei anzeigen

    def squeeze_shallow_cache_to_avg(self):
        with self.lock:
            if len(self.shallow_cache) > 0:
                out_temp_avg = utils.avg(
                    [x.outdoor_data[0] for x in self.shallow_cache])
                out_humi_avg = utils.avg(
                    [x.outdoor_data[1] for x in self.shallow_cache])
                out_pm10_avg = utils.avg(
                    [x.outdoor_data[2] for x in self.shallow_cache])
                out_pm25_avg = utils.avg(
                    [x.outdoor_data[3] for x in self.shallow_cache])
                in_temp_avg = utils.avg(
                    [x.indoor_data[0] for x in self.shallow_cache])
                in_humi_avg = utils.avg(
                    [x.indoor_data[1] for x in self.shallow_cache])
                in_pm10_avg = utils.avg(
                    [x.indoor_data[2] for x in self.shallow_cache])
                in_pm25_avg = utils.avg(
                    [x.indoor_data[3] for x in self.shallow_cache])

                avg = HumidexData(
                    (in_temp_avg, in_humi_avg, in_pm10_avg, in_pm25_avg),
                    (out_temp_avg, out_humi_avg, out_pm10_avg, out_pm25_avg))
                avg.timestamp = self.shallow_cache[-1].timestamp
                self.avg_cache.append(avg)
                self.shallow_cache = []

Beispiel #6

0

Datei anzeigen

Datei: sensors.py Projekt: myhouse-project/myHouse

def summarize(sensor, timeframe, start, end):
    # prepare the database schema to use
    if timeframe == "hour":
        key_to_read = sensor["db_sensor"]
        key_to_write = sensor["db_sensor"] + ":hour"
    elif timeframe == "day":
        key_to_read = sensor["db_sensor"] + ":hour:avg"
        key_to_write = sensor["db_sensor"] + ":day"
    # retrieve from the database the data based on the given timeframe
    data = db.rangebyscore(key_to_read, start, end, withscores=True)
    # split between values and timestamps
    values = []
    timestamps = []
    for i in range(0, len(data)):
        timestamps.append(data[i][0])
        values.append(data[i][1])
    # calculate the derived values
    timestamp = start
    min = avg = max = rate = sum = count = count_unique = "-"
    if "avg" in sensor["summarize"] and sensor["summarize"]["avg"]:
        # calculate avg
        avg = utils.avg(values)
        db.deletebyscore(key_to_write + ":avg", start, end)
        db.set(key_to_write + ":avg", avg, timestamp)
    if "min_max" in sensor["summarize"] and sensor["summarize"]["min_max"]:
        # calculate min
        min = utils.min(values)
        db.deletebyscore(key_to_write + ":min", start, end)
        db.set(key_to_write + ":min", min, timestamp)
        # calculate max
        max = utils.max(values)
        db.deletebyscore(key_to_write + ":max", start, end)
        db.set(key_to_write + ":max", max, timestamp)
    if "rate" in sensor["summarize"] and sensor["summarize"]["rate"]:
        # calculate the rate of change
        rate = utils.velocity(timestamps, values)
        db.deletebyscore(key_to_write + ":rate", start, end)
        db.set(key_to_write + ":rate", rate, timestamp)
    if "sum" in sensor["summarize"] and sensor["summarize"]["sum"]:
        # calculate the sum
        sum = utils.sum(values)
        db.deletebyscore(key_to_write + ":sum", start, end)
        db.set(key_to_write + ":sum", sum, timestamp)
    if "count" in sensor["summarize"] and sensor["summarize"]["count"]:
        # count the values
        count = utils.count(values)
        db.deletebyscore(key_to_write + ":count", start, end)
        db.set(key_to_write + ":count", count, timestamp)
    if "count_unique" in sensor["summarize"] and sensor["summarize"][
            "count_unique"]:
        # count the unique values
        count_unique = utils.count_unique(values)
        db.deletebyscore(key_to_write + ":count_unique", start, end)
        db.set(key_to_write + ":count_unique", count_unique, timestamp)
    log.debug("[" + sensor["module_id"] + "][" + sensor["group_id"] + "][" +
              sensor["sensor_id"] + "] (" + utils.timestamp2date(timestamp) +
              ") updating summary of the " + timeframe +
              " (min,avg,max,rate,sum,count,count_unique): (" + str(min) +
              "," + str(avg) + "," + str(max) + "," + str(rate) + "," +
              str(sum) + "," + str(count) + "," + str(count_unique) + ")")

Beispiel #7

0

Datei anzeigen

def get_research_methods(triples):
    """
    Get chunks from IOB tags of research methods
    Input: list of triple [word, tag, score] of the paper got from tagger
    Ouput: list of research methods with scores
    """
    tags = ['O', 'B-RS', 'I-RS']
    top = 'O'
    stack_rs = []
    stack_sc = []
    dict_research_method = {}

    # stack to get the research methods from paper
    for i in range(len(triples)):
        if triples[i][1] == 'B-RS':
            if top == 'O':
                stack_rs.append(triples[i][0])
                stack_sc.append(
                    softmax(triples[i][2])[tags.index(triples[i][1])])
                top = 'B-RS'
            else:
                research_method = ' '.join(stack_rs)
                dict_research_method[research_method] = avg(stack_sc)
                stack_rs = [triples[i][0]]
                stack_sc = [triples[i][2]]
                top = 'B-RS'

        elif triples[i][1] == 'I-RS':
            if top == 'O':
                continue
            else:
                stack_rs.append(triples[i][0])
                stack_sc.append(
                    softmax(triples[i][2])[tags.index(triples[i][1])])
                top = 'I-RS'

        else:
            if top == 'O':
                continue
            else:
                research_method = ' '.join(stack_rs)
                dict_research_method[research_method] = avg(stack_sc)
                stack_rs = []
                stack_sc = []
                top = 'O'

    return dict_research_method

Beispiel #8

0

Datei anzeigen

Datei: metric.py Projekt: ctensmeyer/formCluster

    def print_cluster_separation(self):
        print "CLUSTER SEPERATION"
        print
        print "Comparing each Cluster to it's most similar other clusters"

        if len(self.clusters) < 2:
            print "There are less than two clusters"
            return

        cluster_sim_mat = self.confirm.get_cluster_sim_mat()
        for row in cluster_sim_mat:
            row.sort(reverse=True)

        top_1 = list()
        top_3 = list()
        top_5 = list()
        for row in cluster_sim_mat:
            for x, val in enumerate(row):
                if x == 0:
                    continue
                if x <= 1:
                    top_1.append(val)
                if x <= 3:
                    top_3.append(val)
                if x <= 5:
                    top_5.append(val)
                else:
                    break
        top_1.sort(reverse=True)

        top_1_mean = utils.avg(top_1)
        top_1_stddev = utils.stddev(top_1)
        top_3_mean = utils.avg(top_3)
        top_3_stddev = utils.stddev(top_3)
        top_5_mean = utils.avg(top_5)
        top_5_stddev = utils.stddev(top_5)
        print "\n        Mean\t   Std Dev"
        print "Top 1: %3.3f\t %3.3f" % (top_1_mean, top_1_stddev)
        print "Top 3: %3.3f\t %3.3f" % (top_3_mean, top_3_stddev)
        print "Top 5: %3.3f\t %3.3f" % (top_5_mean, top_5_stddev)
        print
        print "List of 10 most similar scores"
        print ", ".join(map(lambda x: "%4.3f" % x, top_1[:10]))

        print
        print

Beispiel #9

0

Datei anzeigen

Datei: metric.py Projekt: waldol1/formCluster

	def print_cluster_separation(self):
		print "CLUSTER SEPERATION"
		print
		print "Comparing each Cluster to it's most similar other clusters"

		if len(self.clusters) < 2:
			print "There are less than two clusters"
			return

		cluster_sim_mat = self.confirm.get_cluster_sim_mat()
		for row in cluster_sim_mat:
			row.sort(reverse=True)

		top_1 = list()
		top_3 = list()
		top_5 = list()
		for row in cluster_sim_mat:
			for x, val in enumerate(row):
				if x == 0:
					continue
				if x <= 1:
					top_1.append(val)
				if x <= 3:
					top_3.append(val)
				if x <= 5:
					top_5.append(val)
				else:
					break
		top_1.sort(reverse=True)

		top_1_mean = utils.avg(top_1)
		top_1_stddev = utils.stddev(top_1)
		top_3_mean = utils.avg(top_3)
		top_3_stddev = utils.stddev(top_3)
		top_5_mean = utils.avg(top_5)
		top_5_stddev = utils.stddev(top_5)
		print "\n        Mean\t   Std Dev"
		print "Top 1: %3.3f\t %3.3f" % (top_1_mean, top_1_stddev)
		print "Top 3: %3.3f\t %3.3f" % (top_3_mean, top_3_stddev)
		print "Top 5: %3.3f\t %3.3f" % (top_5_mean, top_5_stddev)
		print
		print "List of 10 most similar scores"
		print ", ".join(map(lambda x: "%4.3f" % x, top_1[:10]))

		print
		print

Beispiel #10

0

Datei anzeigen

Datei: cron_report_maker.py Projekt: markwnixon/crontasks

def report_headers(file3, item):
    cdata = companydata()
    today = datetime.today().strftime('%m/%d/%Y')
    invodate = datetime.today().strftime('%m/%d/%Y')
    ltm, rtm, bump, tb, ctrall, left_ctr, right_ctr, dl, dh, tdl, hls, m1, m2, m3, m4, m5, m6, m7, n1, n2, n3 = reportsettings(
        1)

    dateline = m1 + 8.2 * dl
    mtmp = dateline - 3.5 * dl
    level1 = mtmp + 3.5 * dl

    c = canvas.Canvas(file3, pagesize=letter)
    c.setLineWidth(1)

    c.setFont('Helvetica-Bold', 24, leading=None)
    c.drawCentredString(rtm - 75, dateline + 1.5 * dl, 'Report')
    c.setFont('Helvetica-Bold', 12, leading=None)
    c.drawString(ltm + bump * 3, level1 + bump * 2, f'{item.upper()} Report')
    c.setFont('Helvetica', 12, leading=None)
    c.drawCentredString(rtm - 50, dateline + bump, 'Created')
    #c.drawCentredString(rtm - 37.7, dateline + bump, 'Type')

    vdat = Vehicles.query.filter(Vehicles.DOTNum != None).first()
    dh = 13
    top = level1 - dh
    lft = ltm + bump * 3
    header = list(range(5))
    header[0] = f'This Report Page is the {item.upper()}'
    header[1] = 'Information List for'
    header[2] = f'{cdata[0]}'
    header[3] = f'DOT #{vdat.DOTNum}'
    header[4] = ''
    for ix in header:
        c.drawString(lft, top, ix)
        top = top - dh

    x = avg(rtm - 75, rtm)
    y = dateline - dh - bump
    #c.drawCentredString(x, y, f'{item.upper()}')
    x = avg(rtm - 75, rtm - 150)
    c.drawCentredString(rtm - 50, y, invodate)

    c.showPage()
    c.save()

Beispiel #11

0

Datei anzeigen

 def _init_user_avg(self):
     """
     Description
         A function which returns the users' average ratings as
         a defaultdict.
     """
     user_avg = defaultdict(int)
     for user in self.users:
         user_avg[user] = avg(self.matrix[user])
     return user_avg

Beispiel #12

0

Datei anzeigen

Datei: mf_explicit_prep_test.py Projekt: joaoms/incRec

 def test_initialization(self):
     dimension = 10
     matrix = [[randint(1, 10) for _i in range(0, dimension)]
               for _c in range(0, dimension)]
     cf = MFExplicitPrepSGD(matrix, lf=4)
     self.assertEqual(len(cf.matrix), dimension)
     elements = [
         element for row in cf.preprocessed_matrix for element in row
     ]
     self.assertAlmostEqual(avg(elements), 0, delta=0.00001)

Beispiel #13

0

Datei anzeigen

 def _init_item_avg(self):
     """
     Description
         A function which returns the items' average ratings as
         a defaultdict.
     """
     item_avg = defaultdict(int)
     for item in self.items:
         item_avg[item] = avg(self.matrix.col(item))
     return item_avg

Beispiel #14

0

Datei anzeigen

 def _init_avg_ratings(self):
     """
     Description
         A function which computes and returns users'
         average ratings.
     """
     avg_r = DynamicArray(default_value=lambda: 0)
     for index, user in enumerate(self.matrix):
         avg_r[index] = avg(user)
     return avg_r

Beispiel #15

0

Datei anzeigen

Datei: simulation.py Projekt: mattyoung101/soccer-neat

def simulate(net, config):
    robot, ball, goal = reset()
    global fitness, total_steps, MAX_STEPS, reset_sim
    for step in range(MAX_STEPS):
        # calculate new net inputs
        rotated_center = Vec2d(10.5, 0)
        rotated_center.rotate(robot.angle)
        rotated_center += robot.position

        goal_pos = utils.avg(goal.a, goal.b)
        ball_dist = utils.dist(rotated_center,
                               ball.position)  # robot -> ball dist
        goal_dist = utils.dist(ball.position, goal_pos)  # ball -> goal dist
        ball_dir, goal_dir = utils.get_angles(
            rotated_center, ball.position, goal_pos)  # inputs for neural net
        fitness = utils.calculate_fitness(ball_dist, goal_dist,
                                          robot_touched_ball)

        # scale values for nn
        #ball_dir = np.interp(ball_dir, [0, 360], [0.0, 1.0])
        #goal_dir = np.interp(goal_dir, [0, 360], [0.0, 1.0])
        #ball_dist = np.interp(ball_dist, [0, 303.6], [0.0, 1.0])

        # get input from neural net here, need to calculate balldir and goaldir though
        # NEW INPJTS SHOULD BE: fixed ball dir, fixed ball dist, fix goal direction, fixed goal distance
        # need to subtract curent heading from the ball dir
        # goal dir should be robot to goal not bloody ball to goal
        # remove int touched ball
        # something else here?
        rotation, speed = net.activate(
            [ball_dir, ball_dist, goal_dir,
             int(robot_touched_ball)])
        rotation = utils.clamp(rotation, -1.0, 1.0)
        speed = utils.clamp(speed, -1.0, 1.0)
        rotation *= 10  # rotation will be in degrees
        speed *= 50  # max speed = 60

        robot.angle += math.radians(rotation)
        robot.velocity = (speed * math.cos(robot.angle - 1.5708),
                          speed * math.sin(robot.angle - 1.5708))

        # step sim based on input
        robot.angular_velocity = 0
        robot.center_of_gravity = (10.5, 10.5)
        space.step(1.0 / 60.0)

        total_steps += 1

        # session was ended from one of the callback listeners, so we know it's got the bonuses already
        if reset_sim:
            reset_sim = False
            return fitness

    # test failed to complete, still subtract total steps
    return fitness - (total_steps / 1.5) + total_steps_touching_ball

Beispiel #16

0

Datei anzeigen

Datei: calculate_timd.py Projekt: claytonjgilmer/server-2019

def percent_success(actions):
    """Finds the percent of times didSucceed is true in a list of actions.

    actions is the list of actions that can either succeed or fail."""
    successes = [action.get('didSucceed') for action in actions]
    # Returns the integer percentage of times in successes that
    # didSucceed is true. Taking an average of a list of booleans
    # returns a float between 0 and 1 of what percentage of times the
    # value was True.
    # Example: [True, True, False, True] returns 75.
    return round(100 * utils.avg(successes))

Beispiel #17

0

Datei anzeigen

Datei: metric.py Projekt: ctensmeyer/formCluster

    def feature_eval_metrics(self, sim_fun):
        doc_cluster_sims_flat = list()
        doc_cluster_means = list()
        doc_cluster_std_devs = list()
        for cluster in self.clusters:
            cluster_sims = list()
            for _doc in cluster.members:
                val = sim_fun(cluster, _doc)
                doc_cluster_sims_flat.append(val)
                cluster_sims.append(val)
            doc_cluster_means.append(utils.avg(cluster_sims))
            doc_cluster_std_devs.append(utils.stddev(cluster_sims))
        global_mean = utils.avg(doc_cluster_sims_flat)
        global_stddev = utils.stddev(doc_cluster_sims_flat)
        mean_of_means = utils.avg(doc_cluster_means)
        stddev_of_means = utils.stddev(doc_cluster_means)
        mean_of_stddev = utils.avg(doc_cluster_std_devs)
        stddev_of_stddev = utils.stddev(doc_cluster_std_devs)

        return global_mean, global_stddev, mean_of_means, stddev_of_means, mean_of_stddev, stddev_of_stddev

Beispiel #18

0

Datei anzeigen

Datei: metric.py Projekt: waldol1/formCluster

	def feature_eval_metrics(self, sim_fun):
		doc_cluster_sims_flat = list()
		doc_cluster_means = list()
		doc_cluster_std_devs = list()
		for cluster in self.clusters:
			cluster_sims = list()
			for _doc in cluster.members:
				val = sim_fun(cluster, _doc)
				doc_cluster_sims_flat.append(val)
				cluster_sims.append(val)
			doc_cluster_means.append(utils.avg(cluster_sims))
			doc_cluster_std_devs.append(utils.stddev(cluster_sims))
		global_mean = utils.avg(doc_cluster_sims_flat)
		global_stddev = utils.stddev(doc_cluster_sims_flat)
		mean_of_means = utils.avg(doc_cluster_means)
		stddev_of_means = utils.stddev(doc_cluster_means)
		mean_of_stddev = utils.avg(doc_cluster_std_devs)
		stddev_of_stddev = utils.stddev(doc_cluster_std_devs)

		return global_mean, global_stddev, mean_of_means, stddev_of_means, mean_of_stddev, stddev_of_stddev

Beispiel #19

0

Datei anzeigen

Datei: metric.py Projekt: waldol1/formCluster

	def print_cluster_cohesion(self):
		print "CLUSTER COHESION:"
		sim_names = self.clusters[0].members[0].get_feature_set_names()[:]
		sim_names.append("confirm")
		print "\t\t%s     SIZE" % ("        ".join(sim_names))
		for x, cluster in enumerate(self.clusters):
			# list of lists
			similarities = map(lambda _doc: _doc.global_sim(cluster.center), cluster.members)
			to_print = list()
			for y in xrange(len(similarities[0])):
				values = map(lambda row: row[y], similarities)
				to_print.append(utils.avg(values))
				to_print.append(utils.stddev(values))
			values = map(lambda _doc: self.confirm.cluster_doc_similarity(cluster, _doc), cluster.members)
			to_print.append(utils.avg(values))
			to_print.append(utils.stddev(values))
			l = len(cluster.members)
			print "\t%s:  %s  %d" % (x, "  ".join(map(lambda s: "%3.2f" % s, to_print)), l)
		print
		print

Beispiel #20

0

Datei anzeigen

    def update(self, task_info, new_runtime):
        func = task_info['function_id']
        end = task_info['endpoint_id']
        group = self.endpoints[end]['group']

        while len(self.runtimes[func][group].queue) > self.last_n:
            self.runtimes[func][group].get()
        self.runtimes[func][group].put(new_runtime)
        self.avg_runtime[func][group] = avg(self.runtimes[func][group])

        self.num_executions[func][group] += 1

Beispiel #21

0

Datei anzeigen

def calculate_avg_cycle_time(cycles):
    """Calculates the average time for an action based on start and end times.

    Finds the time difference between each action pair passed and
    returns the average of the differences.

    cycles is a list of tuples where the first action in the tuple is
    the intake, and the second item is the placement or drop."""
    cycle_times = []
    for cycle in cycles:
        cycle_times.append(cycle[0].get('time') - cycle[1].get('time'))
    return utils.avg(cycle_times, None)

Beispiel #22

0

Datei anzeigen

Datei: get_processed_information.py Projekt: prosseek/Efficient-Decentralized-Context-Sharing-via-Aggregation-Simulation

 def get_speed(self, condition, sub_name):
     """
     >>> d = get_simple_test_dir() + os.sep + "test_network1"
     >>> r = ReadReports(d)
     >>> info = GetProcessedInformation(r)
     >>> info.get_speed("normal","singles") == [4.75, 4, 5]
     True
     """
     # {'host7': 5, 'host6': 4, 'host5': 5, 'host4': 5, 'host3': 4, 'host2': 5, 'host1': 5, 'host8': 5}
     null_io = self.information.get_last_non_null_io(condition, sub_name)
     # print self.information.get_null_io(condition, sub_name)
     l = simple_dict_to_list(null_io)
     return [avg(l), min(l), max(l)]

Beispiel #23

0

Datei anzeigen

 def saving():
     begin_time = datetime.now() - dt
     round_time = begin_time.replace(
         minute=30 * (begin_time.minute // 30)).strftime('%H:%M')
     try:
         kits = kitchens_at(round_time)
     except MissingDBTimeError:
         return
     new_kits = []
     for kit in kits:
         if kit.name == 'Cadillac':  # Only Cadillac is live for now, do not update the rest
             dkit = kit._asdict()
             dkit['people'] = avg(people)
             dkit['fruits'] = {
                 fruit: avg(values)
                 for fruit, values in fruits_history.items()
             }
             new_kits.append(Kitchen(**dkit))
     kitchens_add_history(LiveInfo(round_time, new_kits))
     people.clear()
     for fruit in fruits_history.values():
         fruit.clear()

Beispiel #24

0

Datei anzeigen

Datei: calculate_obj_tims.py Projekt: frc1678/server-public

def consolidate_nums(nums):
    """Given numbers reported by multiple scouts, estimates actual number

    nums is a list of numbers, representing action counts or times, reported by each scout
    Currently tries to consolidate using only the reports from scouts on one robot,
    but future improvements might change the algorithm to account for other alliance members,
    since TBA can give us the total action counts for the alliance
    """
    mean = utils.avg(nums)
    if mean in nums or len(nums) == 0:
        # Avoid getting a divide by zero error when calculating standard deviation
        return round(mean)
    # If two or more scouts agree, automatically go with what they say
    if len(nums) > len(set(nums)):
        # Still need to consolidate, in case there are multiple modes
        return consolidate_nums(modes(nums))
    # Population standard deviation:
    std_dev = statistics.pstdev(nums)
    # Calculate weighted average, where the weight for each num is its reciprocal square z-score
    # That way, we account less for data farther from the mean
    z_scores = [(num - mean) / std_dev for num in nums]
    weights = [1 / z**2 for z in z_scores]
    float_nums = utils.avg(nums, weights)
    return round(float_nums)

Beispiel #25

0

Datei anzeigen

def history():
    """
    AVG availability of sets overall for a time interval - use for a graph
    :return: [{timeInterval: '2:30', avg:100}]
    """
    live_data = people_counting.get_current()
    kitchens_avg = {
        hour: avg([koch.empty_seats for koch in kitchens_at(hour)])
        for hour in cfg.hours
    }
    if live_data.time in cfg.hours:
        kitchens_avg.update({
            live_data.time:
            avg([koch.empty_seats for koch in live_data.kitchens])
        })

    return jsonify({
        'time':
        live_data.time,
        'kitchens': [{
            'time': time,
            'avg': kitchens_avg
        } for time, kitchens_avg in kitchens_avg.items()]
    })

Beispiel #26

0

Datei anzeigen

Datei: metric.py Projekt: ctensmeyer/formCluster

 def print_cluster_cohesion(self):
     print "CLUSTER COHESION:"
     sim_names = self.clusters[0].members[0].get_feature_set_names()[:]
     sim_names.append("confirm")
     print "\t\t%s     SIZE" % ("        ".join(sim_names))
     for x, cluster in enumerate(self.clusters):
         # list of lists
         similarities = map(lambda _doc: _doc.global_sim(cluster.center),
                            cluster.members)
         to_print = list()
         for y in xrange(len(similarities[0])):
             values = map(lambda row: row[y], similarities)
             to_print.append(utils.avg(values))
             to_print.append(utils.stddev(values))
         values = map(
             lambda _doc: self.confirm.cluster_doc_similarity(
                 cluster, _doc), cluster.members)
         to_print.append(utils.avg(values))
         to_print.append(utils.stddev(values))
         l = len(cluster.members)
         print "\t%s:  %s  %d" % (x, "  ".join(
             map(lambda s: "%3.2f" % s, to_print)), l)
     print
     print

Beispiel #27

0

Datei anzeigen

    def predict(self, user_id, item_id):
        """
        Description:
            Returns a postprocessed prediction of a rating.

        Arguments:
            :param user_id: The user identifier.
            :type user_id: int
            :param item_id: The item identifier.
            :type item_id: int
        """
        if self.matrix[user_id][item_id] is None:
            nbs = self.neighborhood_of(user_id)
            nbs_ratings = [self.matrix[u_id][item_id] for u_id in nbs]
            return avg(nbs_ratings)
        else:
            return self.matrix[user_id][item_id]

Beispiel #28

0

Datei anzeigen

    def recommend(self, user_id, n_rec):
        """
        Description
            A function which returns recommendations for a user.

        Arguments
            :param user_id: The user identifier.
            :type user_id: int
            :param n_rec: The number of items to recommend.
            :type n_rec: int
        """
        item_ids = [i for i in range(0, len(self.matrix[user_id]))
                    if self.matrix[user_id][i] is None]
        nbs = self.neighborhood_of(user_id)
        nbs_predictions = {
            i: [self.predict(n, i) for n in nbs] for i in item_ids}
        predictions = {
            key: avg(nbs_predictions[key]) for key in nbs_predictions}
        return sorted(
            item_ids,
            key=lambda item_id: predictions[item_id])[:-n_rec]

Beispiel #29

0

Datei anzeigen

def history_saver():
    from kitchen_detection.people_counting import LiveInfo
    from statistics import mode

    people = []
    fruits_history = {fruit: [] for fruit in cfg.fruit_types}
    dt = timedelta(minutes=30)

    def saving():
        begin_time = datetime.now() - dt
        round_time = begin_time.replace(
            minute=30 * (begin_time.minute // 30)).strftime('%H:%M')
        try:
            kits = kitchens_at(round_time)
        except MissingDBTimeError:
            return
        new_kits = []
        for kit in kits:
            if kit.name == 'Cadillac':  # Only Cadillac is live for now, do not update the rest
                dkit = kit._asdict()
                dkit['people'] = avg(people)
                dkit['fruits'] = {
                    fruit: avg(values)
                    for fruit, values in fruits_history.items()
                }
                new_kits.append(Kitchen(**dkit))
        kitchens_add_history(LiveInfo(round_time, new_kits))
        people.clear()
        for fruit in fruits_history.values():
            fruit.clear()

    schedule.every().minute.at(":30").do(saving)
    while True:
        people.append(mode(people_buffer))
        for fruit, history in fruits_history.items():
            history.append(avg(fruit_buffer[fruit]))
        schedule.run_pending()
        sleep(60)

Beispiel #30

0

Datei anzeigen

Datei: people_counting.py Projekt: elael/my-spot

def get_current() -> LiveInfo:
    kitchens = []
    for kitchen in cfg.kitchens:
        if kitchen[
                'name'] == 'Cadillac':  # Only Cadillac is live now, fake the rest
            people = mode(kitchen_detection.people_live.people_buffer)
            fruits = {
                fruit: avg(history)
                for fruit, history in
                kitchen_detection.people_live.fruit_buffer.items()
            }
        else:
            people = kitchen['seats'] / 2
            fruits = {'apple': 1}

        kitchens.append(
            Kitchen(kitchen['name'], kitchen['seats'], kitchen['floor'],
                    people, fruits))

    begin_time = datetime.now()
    round_time = begin_time.replace(
        minute=30 * (begin_time.minute // 30)).strftime('%H:%M')
    return LiveInfo(round_time, kitchens)

Beispiel #31

0

Datei anzeigen

Datei: calculate_obj_team.py Projekt: frc1678/server-public

def calculate_obj_team(team):
    """Calculate data for given team using objective calculated TIMs"""
    team_info = {}
    # list of TIMs that the team has been in:
    tims = local_database_communicator.read_dataset('processed.calc_obj_tim', team_number=team)
    # Calculate averages
    for calculation, schema in SCHEMA['averages'].items():
        # Find tims that meet required data field:
        tim_action_counts = []
        for tim in tims:
            # Gets the total number of actions for a single tim
            tim_action_counts.append(sum(
                [tim[tim_field] for tim_field in schema['tim_fields']]))
        if schema['type'] in ['int', 'float']:
            average = utils.avg(tim_action_counts)
            average = STR_TYPES[schema['type']](average)
        else:
            raise TypeError(f'{calculation} should be a number in calc obj team schema')
        team_info[calculation] = average
    # Calculate counts
    for calculation, schema in SCHEMA['counts'].items():
        tims_that_meet_filter = tims
        for key, value in schema['tim_fields'].items():
            if key == 'not':
                # not_field expects the output to be anything but the given filter
                # not_value is the filter that not_field shouldn't have
                for not_field, not_value in value.items():
                    # Checks that the TIMs in the 'not' field are anything other than the filter
                    tims_that_meet_filter = list(filter(lambda tim: tim.get(
                        not_field, not_value) != not_value, tims_that_meet_filter))
            else:
                # Checks that the TIMs in their given field meet the filter
                tims_that_meet_filter = list(filter(
                    lambda tim: tim[key] == value, tims_that_meet_filter))
        team_info[calculation] = STR_TYPES[schema['type']](len(tims_that_meet_filter))
    return team_info

Beispiel #32

0

Datei anzeigen

Datei: corpus.py Projekt: wanlinxie/dissertation

    def test_optimality(self,
                        learner,
                        partition='train',
                        debug_idxs=None,
                        skip_idxs=(),
                        decoder='lp+mst',
                        streaming=False,
                        overwritten_params=(),
                        **kwargs):
        """Note the proportion of optimal solutions when approximating.
        """
        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder=decoder,
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        approx_token_solns, approx_dep_solns = [], []
        for instance in eval_instances:
            if instance.decoder.has_solution():
                approx_token_solns.append(
                    [tuple(idx) for idx in instance.output_idxs])
                approx_dep_solns.append(
                    instance.get_dep_tuples(instance.output_sent,
                                            parse_type='outtree'))
            else:
                approx_token_solns.append([])
                approx_dep_solns.append([])
            del instance.decoder

        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder='ilp',
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        exact_token_solns, exact_dep_solns = [], []
        for instance in eval_instances:
            if instance.decoder.has_solution():
                exact_token_solns.append(
                    [tuple(idx) for idx in instance.output_idxs])
                exact_dep_solns.append(
                    instance.get_dep_tuples(instance.output_sent,
                                            parse_type='outtree'))
            else:
                exact_token_solns.append([])
                exact_dep_solns.append([])

        token_optimality, dep_optimality = [], []
        num_correct_tokens, num_total_tokens = [], []
        num_correct_deps, num_total_deps = [], []
        num_failed_approx, num_failed_exact, num_succeeded = 0, 0, 0
        for approx_tokens, approx_deps, exact_tokens, exact_deps in zip(
                approx_token_solns, approx_dep_solns, exact_token_solns,
                exact_dep_solns):
            if len(approx_tokens) == 0:
                num_failed_approx += 1
            if len(exact_tokens) == 0:
                num_failed_exact += 1
            if len(approx_tokens) == 0 or len(exact_tokens) == 0:
                continue
            else:
                num_succeeded += 1

            assert len(approx_tokens) == len(exact_tokens)

            token_overlap = set(approx_tokens).intersection(exact_tokens)
            token_optimality.append(
                int(len(token_overlap) == len(exact_tokens)))
            num_correct_tokens.append(len(token_overlap))
            num_total_tokens.append(len(exact_tokens))

            dep_overlap = set(approx_deps).intersection(exact_deps)
            dep_optimality.append(int(len(dep_overlap) == len(exact_deps)))
            num_correct_deps.append(len(dep_overlap))
            num_total_deps.append(len(exact_deps))

        print "%d/%d (%.1f%%) optimal token solutions%s" % \
                (sum(token_optimality),
                 num_succeeded,
                 avg(token_optimality) * 100,
                 "; %d approx failed, %d exact failed" % \
                         (num_failed_approx, num_failed_exact)
                         if num_succeeded < len(eval_instances) else "")
        print "token optimality rate: %.1f%% over %d instances, " \
                                     "%.1f%% over %d tokens" % \
                (avg(correct/total * 100
                    for correct, total in zip(num_correct_tokens,
                                              num_total_tokens)),
                 num_succeeded,
                 sum(num_correct_tokens)/sum(num_total_tokens) * 100,
                 sum(num_total_tokens))
        print
        print "%d/%d (%.1f%%) optimal dep solutions%s" % \
                (sum(dep_optimality),
                 num_succeeded,
                 avg(dep_optimality) * 100,
                 "; %d approx failed, %d exact failed" % \
                         (num_failed_approx, num_failed_exact)
                         if num_succeeded < len(eval_instances) else "")
        print "dep optimality rate: %.1f%% over %d instances, " \
                                     "%.1f%% over %d deps" % \
                (avg(correct/total * 100
                    for correct, total in zip(num_correct_deps,
                                              num_total_deps)),
                 num_succeeded,
                 sum(num_correct_deps)/sum(num_total_deps) * 100,
                 sum(num_total_deps))

Beispiel #33

0

Datei anzeigen

Datei: corpus.py Projekt: mcka1n/dissertation

    def test_tightness(self, learner, partition='train', debug_idxs=None,
            skip_idxs=(), decoder='ilp', streaming=False,
            overwritten_params=(),**kwargs):
        """Note the proportion of integral solutions to LPs.
        """
        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder='ilp',
                                               relax=True,
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        print "idx\tsize\twords\toptwrds\tequiv?\tdeps\toptdeps"
        num_failed, num_tight, num_loose = 0, 0, 0
        token_tightness, dep_tightness = [], []
        for i, instance in enumerate(eval_instances):
            if not instance.decoder.has_solution():
                num_failed += 1
            elif instance.decoder.has_integral_solution(ndigits=3):
                num_tight += 1
                token_tightness.append(1)
                dep_tightness.append(1)
            else:
                num_loose += 1

                # Print some additional statistics for the loose ones
                relaxed = instance.decoder.get_integrality()
                instance.decoder.solve(relax=False)
                optimal = instance.decoder.get_integrality()

                # Ensure that the optimal result is integral for sanity
                for var_type in optimal:
                    assert len(optimal[var_type][0]) == 0

                # Check whether the relaxed word solution is the same as the
                # optimal solution, even if it's non-integral
                is_equiv = sorted([var_tuple[0].idx() for var_tuple in
                                  relaxed['word'][0] + relaxed['word'][1]]) \
                    == sorted([var_tuple[0].idx() for var_tuple in
                              optimal['word'][1]])

                print "%d:\t%d\t%d/%d\t%d\t%s\t%d/%d\t%d" % \
                        (i,
                         sum(sent.length for sent in instance.input_sents),
                         len(relaxed['word'][0]),
                         len(relaxed['word'][0]) + len(relaxed['word'][1]),
                         len(optimal['word'][1]),
                         '' if is_equiv else '!',
                         len(relaxed['dep'][0]),
                         len(relaxed['dep'][0]) + len(relaxed['dep'][1]),
                         len(optimal['dep'][1]),
                         )

                token_tightness.append(len(relaxed['word'][1]) / \
                        (len(relaxed['word'][0]) + len(relaxed['word'][1])))
                dep_tightness.append(len(relaxed['dep'][1]) / \
                        (len(relaxed['dep'][0]) + len(relaxed['dep'][1])))

                # If restricted to a few instances, print the details
                if len(eval_instances) < 3:
                    for feat_cat in ('word', 'dep'):
                        for integrality in (0,1):
                            for rel_tuple in relaxed[feat_cat][integrality]:
                                var, relaxed_value = rel_tuple
                                optimal_value = None
                                for opt_tuple in optimal[feat_cat][1]:
                                    if opt_tuple[0].idx() == var.idx():
                                        optimal_value = opt_tuple[1]
                                        break
                                print "%s\t%.3f\t%s" % \
                                        (var.readable_grounding(),
                                         relaxed_value, optimal_value)

        print "%d/%d (%.1f%%) integral solutions%s" % \
                (num_tight,
                 num_tight + num_loose,
                 (num_tight * 100) / float(num_tight + num_loose),
                 "; %d failed" % (num_failed,) if num_failed > 0 else "")
        print "token integrality rate: %.1f%%" % (avg(token_tightness) * 100,)
        print "dep integrality rate: %.1f%%" % (avg(dep_tightness) * 100,)

Beispiel #34

0

Datei anzeigen

Datei: corpus.py Projekt: wanlinxie/dissertation

    def test_tightness(self,
                       learner,
                       partition='train',
                       debug_idxs=None,
                       skip_idxs=(),
                       decoder='ilp',
                       streaming=False,
                       overwritten_params=(),
                       **kwargs):
        """Note the proportion of integral solutions to LPs.
        """
        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder='ilp',
                                               relax=True,
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        print "idx\tsize\twords\toptwrds\tequiv?\tdeps\toptdeps"
        num_failed, num_tight, num_loose = 0, 0, 0
        token_tightness, dep_tightness = [], []
        for i, instance in enumerate(eval_instances):
            if not instance.decoder.has_solution():
                num_failed += 1
            elif instance.decoder.has_integral_solution(ndigits=3):
                num_tight += 1
                token_tightness.append(1)
                dep_tightness.append(1)
            else:
                num_loose += 1

                # Print some additional statistics for the loose ones
                relaxed = instance.decoder.get_integrality()
                instance.decoder.solve(relax=False)
                optimal = instance.decoder.get_integrality()

                # Ensure that the optimal result is integral for sanity
                for var_type in optimal:
                    assert len(optimal[var_type][0]) == 0

                # Check whether the relaxed word solution is the same as the
                # optimal solution, even if it's non-integral
                is_equiv = sorted([var_tuple[0].idx() for var_tuple in
                                  relaxed['word'][0] + relaxed['word'][1]]) \
                    == sorted([var_tuple[0].idx() for var_tuple in
                              optimal['word'][1]])

                print "%d:\t%d\t%d/%d\t%d\t%s\t%d/%d\t%d" % \
                        (i,
                         sum(sent.length for sent in instance.input_sents),
                         len(relaxed['word'][0]),
                         len(relaxed['word'][0]) + len(relaxed['word'][1]),
                         len(optimal['word'][1]),
                         '' if is_equiv else '!',
                         len(relaxed['dep'][0]),
                         len(relaxed['dep'][0]) + len(relaxed['dep'][1]),
                         len(optimal['dep'][1]),
                         )

                token_tightness.append(len(relaxed['word'][1]) / \
                        (len(relaxed['word'][0]) + len(relaxed['word'][1])))
                dep_tightness.append(len(relaxed['dep'][1]) / \
                        (len(relaxed['dep'][0]) + len(relaxed['dep'][1])))

                # If restricted to a few instances, print the details
                if len(eval_instances) < 3:
                    for feat_cat in ('word', 'dep'):
                        for integrality in (0, 1):
                            for rel_tuple in relaxed[feat_cat][integrality]:
                                var, relaxed_value = rel_tuple
                                optimal_value = None
                                for opt_tuple in optimal[feat_cat][1]:
                                    if opt_tuple[0].idx() == var.idx():
                                        optimal_value = opt_tuple[1]
                                        break
                                print "%s\t%.3f\t%s" % \
                                        (var.readable_grounding(),
                                         relaxed_value, optimal_value)

        print "%d/%d (%.1f%%) integral solutions%s" % \
                (num_tight,
                 num_tight + num_loose,
                 (num_tight * 100) / float(num_tight + num_loose),
                 "; %d failed" % (num_failed,) if num_failed > 0 else "")
        print "token integrality rate: %.1f%%" % (avg(token_tightness) * 100, )
        print "dep integrality rate: %.1f%%" % (avg(dep_tightness) * 100, )

Beispiel #35

0

Datei anzeigen

Datei: corpus.py Projekt: mcka1n/dissertation

    def test_optimality(self, learner, partition='train', debug_idxs=None,
            skip_idxs=(), decoder='lp+mst', streaming=False,
            overwritten_params=(),**kwargs):
        """Note the proportion of optimal solutions when approximating.
        """
        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder=decoder,
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        approx_token_solns, approx_dep_solns = [], []
        for instance in eval_instances:
            if instance.decoder.has_solution():
                approx_token_solns.append([tuple(idx)
                                        for idx in instance.output_idxs])
                approx_dep_solns.append(instance.get_dep_tuples(
                                        instance.output_sent,
                                        parse_type='outtree'))
            else:
                approx_token_solns.append([])
                approx_dep_solns.append([])
            del instance.decoder

        eval_instances = self.decode_instances(learner,
                                               partition=partition,
                                               debug_idxs=debug_idxs,
                                               skip_idxs=skip_idxs,
                                               decoder='ilp',
                                               streaming=False,  # keep the LP
                                               overwritten_params=\
                                                       overwritten_params,
                                               **kwargs)

        exact_token_solns, exact_dep_solns = [], []
        for instance in eval_instances:
            if instance.decoder.has_solution():
                exact_token_solns.append([tuple(idx)
                                        for idx in instance.output_idxs])
                exact_dep_solns.append(instance.get_dep_tuples(
                                        instance.output_sent,
                                        parse_type='outtree'))
            else:
                exact_token_solns.append([])
                exact_dep_solns.append([])

        token_optimality, dep_optimality = [], []
        num_correct_tokens, num_total_tokens = [], []
        num_correct_deps, num_total_deps = [], []
        num_failed_approx, num_failed_exact, num_succeeded = 0, 0, 0
        for approx_tokens, approx_deps, exact_tokens, exact_deps in zip(
                approx_token_solns, approx_dep_solns,
                exact_token_solns, exact_dep_solns):
            if len(approx_tokens) == 0:
                num_failed_approx += 1
            if len(exact_tokens) == 0:
                num_failed_exact += 1
            if len(approx_tokens) == 0 or len(exact_tokens) == 0:
                continue
            else:
                num_succeeded += 1

            assert len(approx_tokens) == len(exact_tokens)

            token_overlap = set(approx_tokens).intersection(exact_tokens)
            token_optimality.append(
                    int(len(token_overlap) == len(exact_tokens)))
            num_correct_tokens.append(len(token_overlap))
            num_total_tokens.append(len(exact_tokens))

            dep_overlap = set(approx_deps).intersection(exact_deps)
            dep_optimality.append(int(len(dep_overlap) == len(exact_deps)))
            num_correct_deps.append(len(dep_overlap))
            num_total_deps.append(len(exact_deps))

        print "%d/%d (%.1f%%) optimal token solutions%s" % \
                (sum(token_optimality),
                 num_succeeded,
                 avg(token_optimality) * 100,
                 "; %d approx failed, %d exact failed" % \
                         (num_failed_approx, num_failed_exact)
                         if num_succeeded < len(eval_instances) else "")
        print "token optimality rate: %.1f%% over %d instances, " \
                                     "%.1f%% over %d tokens" % \
                (avg(correct/total * 100
                    for correct, total in zip(num_correct_tokens,
                                              num_total_tokens)),
                 num_succeeded,
                 sum(num_correct_tokens)/sum(num_total_tokens) * 100,
                 sum(num_total_tokens))
        print
        print "%d/%d (%.1f%%) optimal dep solutions%s" % \
                (sum(dep_optimality),
                 num_succeeded,
                 avg(dep_optimality) * 100,
                 "; %d approx failed, %d exact failed" % \
                         (num_failed_approx, num_failed_exact)
                         if num_succeeded < len(eval_instances) else "")
        print "dep optimality rate: %.1f%% over %d instances, " \
                                     "%.1f%% over %d deps" % \
                (avg(correct/total * 100
                    for correct, total in zip(num_correct_deps,
                                              num_total_deps)),
                 num_succeeded,
                 sum(num_correct_deps)/sum(num_total_deps) * 100,
                 sum(num_total_deps))

Beispiel #36

0

Datei anzeigen

Datei: corpus.py Projekt: wanlinxie/dissertation

    def check_dep_coverage(self,
                           partition='train',
                           debug_idxs=None,
                           skip_idxs=(),
                           var_conf=None):
        """Record the fraction of potential arcs that are present in gold
        trees.
        """
        var_flags = variables.TransductionVariables.parse_var_conf(var_conf)
        tgt_instances = self.get_instances(partition=partition,
                                           debug_idxs=debug_idxs,
                                           skip_idxs=skip_idxs)
        prev_average_overlap = None
        print "ancestor_limit\tavg_overlap_rate\tavg_reachability"
        for ancestor_limit in range(30):
            ancestor_limit = None if ancestor_limit == 0 \
                                  else ancestor_limit
            instance_overlaps = []
            instance_reachability = []

            for instance in tgt_instances:
                # TODO: merge with instance.get_overlap()
                sent_dep_tuples = instance.get_constrained_dep_tuples(
                    instance.sentences[0],
                    original_tree=var_flags['orig_deps'],
                    ancestor_dags=var_flags['anc_deps'],
                    pos_matching=var_flags['pos_deps'],
                    noninverted_deps=var_flags['noninv_deps'],
                    fixed_root=var_flags['fixed_root'],
                    verb_root=var_flags['verb_root'],
                    ancestor_limit=ancestor_limit)
                gold_overlaps = []
                for gold_sent in instance.gold_sentences:
                    gold_dep_tuples = instance.get_dep_tuples(
                        gold_sent, parse_type='dparse')
                    gold_dep_tuple_set = set(gold_dep_tuples)
                    overlap = gold_dep_tuple_set.intersection(sent_dep_tuples)
                    gold_overlaps.append(len(overlap) / \
                                         len(gold_dep_tuple_set))

#                    if len(overlap) < len(gold_dep_tuple_set):
#                        print instance.get_display_string()
#                        print gold_dep_tuple_set - overlap

                instance_overlaps.append(avg(gold_overlaps))
                instance_reachability.append(int(min(gold_overlaps) == 1))

            average_overlap = avg(instance_overlaps)
            average_reachability = avg(instance_reachability)
            if average_overlap == prev_average_overlap:
                continue
            prev_average_overlap = average_overlap

            print ancestor_limit, '\t\t', average_overlap,
            print '\t\t', average_reachability
            print '\t\t\t', sum(instance_overlaps),
            print '\t\t', sum(instance_reachability)
            print '\t\t\t', len(instance_overlaps),
            print '\t\t', len(instance_reachability)
            if not var_flags['anc_deps'] or \
                    (ancestor_limit > 0 and average_overlap == 1.0):
                break

Beispiel #37

0

Datei anzeigen

Datei: interval.py Projekt: AndersHqst/SAAS-2014

def plot_intervals(output_folder):
    from parsers import CVOutputParser
    from preprocessing import Preprocessor
    from utils import avg
    import os
    import math
    """ 
    Given a cross validation ouput. Certain triple intervals can be plottet
    to compare the error for extrapolation, max ent and the heurestic.
    
    The algorithm runs through each triple interval, and then for each sampled estiamte output
    the triples in the interval are looked up in each sample and the MAPE error is 
    recorded and the average errors are added. And the average of these averages
    are then plottet for each interval.

    """
    if not output_folder[-1] == '/':
        output_folder += '/'
    intervals = 30
    triple_intervals = Preprocessor.triple_intervals(output_folder + 'observed_frequent_items.out', intervals=intervals)

    avg_max_ent_errors = []
    avg_ext_errors = []
    avg_heu_errors = []
    pair_triple_ratios = [i/10. for i in range(11)] # binned ratios [0.0 to 1.0]
    max_ent_ratio_error = [0 for i in range(11)]
    ext_ratio_error = [0 for i in range(11)]

    for index, triple_interval in enumerate(triple_intervals):
        print 'Triple interval {} of {}'.format(index, intervals)
        iteration = 0
        MAPE_avg_errors = []
        MAPE_avg_errors_ext = []
        # MAPE_avg_errors_heu = []
        while True:
            max_ent_est_file = output_folder + str(iteration) + '_data.tsv'
            ext_est_file = output_folder + str(iteration) + '_data_extrapolation.tsv'
            # heu_est_file = output_folder + str(iteration) + '_data_heurestic.tsv'
            # read baseline also?
            # Read until we do not find an output file
            if not os.path.exists(max_ent_est_file):
                break

            max_ent_est = CVOutputParser.read_est_obs_file(max_ent_est_file)
            ext_est = CVOutputParser.read_est_obs_file(ext_est_file)
            # heu_est = CVOutputParser.read_est_obs_file(heu_est_file)

            MAPE_errors = []
            MAPE_errors_ext = []
            # MAPE_errors_heu = []

            for triple in triple_interval:
                # Check that the triple has been estimated
                if triple in max_ent_est:

                    # Index 1 should hold the observed value parsed from the file
                    # is the same mapped to every estimate, so hust read it once.
                    obs = max_ent_est[triple][1]

                    # maxent estimate
                    est = max_ent_est[triple][0]

                    # extrapolation estimate
                    est2 = ext_est[triple][0]

                    # # independence estimat?

                    # heurestic, use max_ent for 0 triple in sample
                    # est4 = heu_est[triple][0]

                    # Index 2 should hold the pair triple ratio.
                    # is the sam for every estimat
                    ratio = max_ent_est[triple][2]
                    # bin the ratio to one decimal
                    ratio_binned = round(ratio, 1)
                    # add errors to the ratio
                    max_ent_ratio_error[pair_triple_ratios.index(ratio_binned)] += abs(est-obs) / float(obs)
                    ext_ratio_error[pair_triple_ratios.index(ratio_binned)] += abs(est2-obs) / float(obs)


                    # MAPE error max ent
                    # error = abs(obs-est) #/ float(obs) * 100
                    # MAPE_errors.append(error)

                    # # MAPE error extrapolation
                    # error2 = abs(obs-est2) #/ float(obs) * 100
                    # MAPE_errors_ext.append(error2)

                    # MAPE error independence?

                    # MAPE error heurestic
                    # error4 = abs(obs-est4) #/ float(obs) * 100
                    # MAPE_errors_heu.append(error4)

                    

                    # MAPE baseline error?
            MAPE_avg_errors.append(avg(MAPE_errors))
            MAPE_avg_errors_ext.append(avg(MAPE_errors_ext))
            # MAPE_avg_errors_heu.append(avg(MAPE_errors_heu))
            iteration += 1

        avg_max_ent_errors.append(avg(MAPE_avg_errors))
        avg_ext_errors.append(avg(MAPE_avg_errors_ext))
        # avg_heu_errors.append(avg(MAPE_avg_errors_heu))
        

    plot(range(len(avg_max_ent_errors)), avg_max_ent_errors, color='blue')
    plot(range(len(avg_ext_errors)), avg_ext_errors, color='red')

Beispiel #38

0

Datei anzeigen

Datei: triple_errors.py Projekt: jonasbusk/saas2014

def triple_errors(output_folder, triple):
    from parsers import CVOutputParser
    from utils import interpolate, avg, confidence_interval
    import math
    from collections import Counter
    import os

    """ 
    Plot accumulated errors for estimators against pair triple ratios.
    Ratios are binned in the range 0.0 to 1.0.
    """
    if not output_folder[-1] == "/":
        output_folder += "/"

    iteration = -1
    max_ent_errors = []
    ext_errors = []
    max_ent_abs_errors = []
    ext_abs_errors = []
    samples_ignored = 0
    while True:
        iteration += 1
        max_ent_est_file = output_folder + str(iteration) + "_data.tsv"
        ext_est_file = output_folder + str(iteration) + "_data_extrapolation.tsv"
        # heu_est_file = output_folder + str(iteration) + '_data_heurestic.tsv'
        # read baseline also?
        # Read until we do not find an output file
        if not os.path.exists(max_ent_est_file):
            break

        # Read the maxent estimate
        found = False
        for sample_triple, (est, obs, ratio, triangle) in CVOutputParser.read_est_obs_file_disc_version_2(
            max_ent_est_file
        ):
            (s1, s2, s3, s12, s13, s23, s123) = triangle

            if sample_triple == triple:
                # if s123 == 0:
                #     break
                found = True
                max_ent_errors.append(est - obs)
                max_ent_abs_errors.append(abs(obs - est))
                break

        if not found:
            samples_ignored += 1
            continue

        for sample_triple, (est, obs, ratio, triangle) in CVOutputParser.read_est_obs_file_disc_version_2(ext_est_file):
            (s1, s2, s3, s12, s13, s23, s123) = triangle

            if sample_triple == triple:
                ext_errors.append(est - obs)
                ext_abs_errors.append(abs(obs - est))
                break

    # maxent confidence interval
    maxent_ci = confidence_interval(max_ent_errors)
    # extrapolation confidence interval
    ext_ci = confidence_interval(ext_errors)

    print "samples ignored: ", samples_ignored
    print "maxent avg error: ", round(avg(max_ent_errors), 1)
    print "maxent 95% confidence interval: ", (round(maxent_ci[0], 1), round(maxent_ci[1], 2))
    print "extrapolation avg error: ", round(avg(ext_errors), 1)
    print "extrapolation 95% confidence interval: ", (round(ext_ci[0], 1), round(ext_ci[1], 2))

    # round
    max_ent_errors_rounded = [round(x, 1) for x in max_ent_errors]
    ext_errors_rounded = [round(x, 1) for x in ext_errors]

    # plot
    xlabel("Estimate error")
    ylabel("Bucket size")
    # text(0.1, 0.8, 'Maxent')
    # text(0.1, 0.7, 'avg. error: ' + str(avg(max_ent_errors)))
    # text(0.1, 0.6, '95% conf. interval: ' + str(maxent_ci))

    # text(0.5, 0.8, 'Extrapolation')
    # text(0.5, 0.7, 'avg. error: ' + str(avg(ext_errors)))
    # text(0.5, 0.6, '95% conf. interval: ' + str(ext_ci))

    hist([max_ent_errors_rounded, ext_errors_rounded], color=("b", "r"))

    return max_ent_errors, max_ent_abs_errors, ext_errors, ext_abs_errors

Beispiel #39

0

Datei anzeigen

Datei: features_funcs.py Projekt: adolenc/topotext

def min_max_avg(array):
    """
    Returns min,max and avg elements of array..
    """
    return min(array), max(array), avg(array)

Beispiel #40

0

Datei anzeigen

Datei: find-loss-increases.py Projekt: samadejacobs/CANDLESup


worst = increases[-1]
print_delta('worst:    ', worst)

n_01p = int(round(len(increases) / 100))  # Worst 1 percentile
if n_01p == 0: n_01p = 1
worst_01p = increases[-n_01p]
print_delta('worst  1%:', worst_01p)

n_10p = int(round(len(increases) / 10))  # Worst 10 percentile
if n_10p == 0: n_10p = 1
worst_10p = increases[-n_10p]
print_delta('worst 10%:', worst_10p)

print('increases that stopped early: %i' % stopped_early)

values_increase = []
values_val_loss = []
for node in increases:
    values_increase.append(node.get_val_loss_delta())
    values_val_loss.append(node.val_loss)
avg_increase = avg(values_increase)
avg_val_loss = avg(values_val_loss)
print('avg increase: %f' % avg_increase)
delta_ratio = 100.0 * avg_increase / avg_val_loss
print('avg increase fraction: %f' % delta_ratio)

file_increase_deltas = "increase-deltas-%s.data" % args.token
append(file_increase_deltas, "%i %5.1f" % (args.stage, delta_ratio))

Beispiel #41

0

Datei anzeigen

Datei: hclust.py Projekt: dacav/Machine-Learning

 def dist_average (c0, c1):
     return Vector.distance(avg(c0), avg(c1))

Beispiel #42

0

Datei anzeigen

Datei: hclust.py Projekt: dacav/Machine-Learning

 def average_distance (c0, c1):
     return avg( ClustComparers.distances(c0, c1) )

Beispiel #43

0

Datei anzeigen

Datei: hclust.py Projekt: dacav/Machine-Learning

 def sum_of_squared_errors (self):
     normsq = lambda v : abs(Vector.norm(v, 1))
     clust_avg = lambda clst : (clst, avg(clst))
     clust_sqerr = lambda (clst, mu) : sum(normsq(v - mu) for v in clst)
     return sum( it.imap(clust_sqerr, it.imap(clust_avg, self)) )

Beispiel #44

0

Datei anzeigen

Datei: tess.py Projekt: sugar-activities/4366-activity

    def __init__(self, n):  # n is the tessellation number
        self.clear()
        self.n = n

        if n == 1:  #Hexagons
            shape = 'hex2'
            img_n = 0
            l1 = []
            s = 70.0 / 11
            h = s * 1.732 / 2
            dx = s * 1.5
            dy = h
            x0 = s / 2 - .4
            y0 = 0 - .3
            x = x0
            y = y0
            for i in range(4):
                l1.append((x, y))
                x += dx
                y += dy
                dy = -dy
            x = x0
            y = y0 + 2 * h
            for i in range(4):
                l1.append((x, y))
                x += dx
                y += dy
                dy = -dy
            x = x0
            y = y0 + 4 * h
            for i in range(2):
                l1.append((x, y))
                x += 2 * dx
            self.init(l1, img_n, shape)

        elif n == 2:  # Squares
            # square 1
            shape = 'sq1'
            img_n = 0
            l1 = []
            s = 5.33
            d = s * 2
            y = s / 2
            for r in range(2):
                x = s / 2
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += d
            y = s / 2 + s
            for r in range(2):
                x = s / 2 + s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += d
            self.init(l1, img_n, shape)
            # square 2
            shape = 'sq2'
            img_n = 1
            l1 = []
            y = s / 2
            for r in range(2):
                x = s / 2 + s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += d
            y = s / 2 + s
            for r in range(2):
                x = s / 2
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += d
            self.init(l1, img_n, shape)

        elif n == 3:  #Triangles
            shape = 'tri1'
            img_n = 0
            l1 = []
            s = 5.33
            d = s * 2
            y0 = s * .866 - 3
            y = y0
            for r in range(2):
                x = s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += s * 1.732 * 2
            y = y0 + s * 1.732
            for r in range(1):
                x = 0
                for c in range(4):
                    l1.append((x, y))
                    x += d
                y += s * 1.732 * 2
            self.init(l1, img_n, shape)
            shape = 'tri2'
            img_n = 1
            l1 = []
            y = y0
            for r in range(2):
                x = 0
                for c in range(4):
                    l1.append((x, y))
                    x += d
                y += s * 1.732 * 2
            y = y0 + s * 1.732
            for r in range(1):
                x = s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += s * 1.732 * 2
            self.init(l1, img_n, shape)

        elif n == 4:
            # dodecagons
            shape = 'dodec'
            img_n = 0
            l1 = ((0, 1.5), (16, 1.5), (32, 1.5), (8, 15.3), (24, 15.3))
            self.init(l1, img_n, shape)
            # hexagons
            shape = 'hex'
            img_n = 1
            l1 = ((8, 6), (24, 6), (0, 10.8), (16, 10.8), (32, 10.8),
                  (0, 19.8), (16, 19.8), (32, 19.8))
            self.init(l1, img_n, shape)
            # squares @ 30deg
            shape = 'sq30'
            img_n = 2
            l1 = ((12, 8.4), (28, 8.4), (4, 22.1), (20, 22.1))
            self.init(l1, img_n, shape)
            # squares @ -30deg
            shape = 'sq_30'
            img_n = 3
            l1 = ((4, 8.4), (20, 8.4), (12, 22.1), (28, 22.1))
            self.init(l1, img_n, shape)
            # squares
            shape = 'sq'
            img_n = 4
            l1 = ((8, 1.5), (24, 1.5), (0, 15.3), (16, 15.3), (32, 15.3))
            self.init(l1, img_n, shape)

        elif n == 5:  # Alhambra
            x0 = 2.9
            y0 = 2.4
            dx = 6.55
            dy = 6.55
            # red
            shape = 'red'
            img_n = 0
            l1 = []
            y = y0
            for r in range(2):
                x = x0
                for c in range(3):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            y = y0 + dy
            for r in range(2):
                x = x0 + dx
                for c in range(2):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            self.init(l1, img_n, shape)
            # yellow
            shape = 'yellow'
            img_n = 1
            l1 = []
            y = y0
            for r in range(2):
                x = x0 + dx
                for c in range(2):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            y = y0 + dy
            for r in range(2):
                x = x0
                for c in range(3):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            self.init(l1, img_n, shape)
            # blue
            shape = 'blue'
            img_n = 2
            l1 = []
            y = y0 + dy / 2
            for r in range(2):
                x = x0 + dx / 2
                for c in range(3):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            y = y0 - dy / 2
            for r in range(2):
                x = x0 + dx + dx / 2
                for c in range(2):
                    if r == 1 and c == 0: l1.append((x - 2 * dx, y), )
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            self.init(l1, img_n, shape)
            # green
            shape = 'green'
            img_n = 3
            l1 = []
            y = y0 + dy / 2
            for r in range(2):
                x = x0 - dx / 2
                for c in range(3):
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            y = y0 - dy / 2
            for r in range(2):
                x = x0 + dx - dx / 2
                for c in range(2):
                    if r == 1 and c == 1: l1.append((x + 2 * dx, y), )
                    l1.append((x, y))
                    x += 2 * dx
                y += 2 * dy
            self.init(l1, img_n, shape)

        elif n == 6:  # Hexagons & Triangles
            shape = 'hex3'
            img_n = 0
            l1 = []
            s = 5.333
            d = s * 2
            x0 = 0
            y0 = 0
            h = s * 1.732 / 2
            y = y0
            for r in range(2):
                x = x0 + s
                for c in range(3):
                    l1.append((x, y))
                    x += d
                y += h * 4
            x = x0 + 0
            y = y0 + h * 2
            for c in range(4):
                l1.append((x, y))
                x += d
            self.init(l1, img_n, shape)
            shape = 'tri3'
            img_n = 1
            l1 = []
            y = y0 + h / 2
            for r in range(2):
                x = x0
                for c in range(4):
                    l1.append((x, y))
                    x += d
                y += h * 4
            x = x0 + s
            y = y0 + h * 2.5
            for c in range(3):
                l1.append((x, y))
                x += d
            self.init(l1, img_n, shape)
            shape = 'tri4'
            img_n = 2
            l1 = []
            x = x0 + s
            y = y0 + h * 1.5
            for c in range(3):
                l1.append((x, y))
                x += d
            x = x0
            y = y0 + h * 3.5
            for c in range(4):
                l1.append((x, y))
                x += d
            self.init(l1, img_n, shape)

        elif n == 7:  # Hexagons & Triangles & Squares
            shape = 'hex6'
            img_n = 0
            l1 = []
            s = 4.96
            y0 = .55
            h = s * 1.732 / 2
            x0 = 16 - s - 2 * h
            y = y0
            dx = s + 2 * h
            dy = 3 * s + 2 * h
            for r in range(2):
                x = x0
                for c in range(3):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = x0 + s / 2 + h
            y = y0 + 1.5 * s + h
            l1 += ((x, y), (x + dx, y))
            self.init(l1, img_n, shape)
            c0 = l1[0]
            c3 = l1[3]
            c6 = l1[6]
            shape = 'sq6'
            img_n = 1
            l1 = []
            x = x0 + h + s / 2
            y = y0
            l1 = [(x, y), (x + dx, y)]
            x = x0
            y = y0 + 1.5 * s + h
            l1 += ((x, y), (x + dx, y), (x + 2 * dx, y))
            self.init(l1, img_n, shape)
            shape = 'sq7'
            img_n = 2
            (x, y1) = utils.avg(c0, c6)
            l1 = [(x, y1), (x + dx, y1), (x + 2 * dx, y1)]
            x -= dx / 2
            (t, y2) = utils.avg(c6, c3)
            l1 += [(x, y2), (x + dx, y2), (x + 2 * dx, y2)]
            self.init(l1, img_n, shape)
            shape = 'sq8'
            img_n = 3
            l1 = [(x, y1), (x + dx, y1), (x + 2 * dx, y1)]
            x += dx / 2
            l1 += [(x, y2), (x + dx, y2), (x + 2 * dx, y2)]
            self.init(l1, img_n, shape)
            shape = 'tri6'
            img_n = 4
            dy = y2 - y1
            x = x0 + h + s / 2
            y1 = y0 + s / 2 + h / 2
            l1 = [(x, y1), (x + dx, y1)]
            x -= dx / 2
            y = y1 + dy
            l1 += [(x, y), (x + dx, y), (x + 2 * dx, y)]
            self.init(l1, img_n, shape)
            shape = 'tri7'
            img_n = 5
            x = x0 + h + s / 2 - dx / 2
            y1 = y0 + s + h / 2
            l1 = [(x, y1), (x + dx, y1), (x + 2 * dx, y1)]
            x += dx / 2
            y = y1 + dy
            l1 += [(x, y), (x + dx, y)]
            self.init(l1, img_n, shape)

        elif n == 8:  # Octagons & Squares
            shape = 'oct'
            img_n = 0
            l1 = []
            y = 4
            for r in range(3):
                x = 4
                for c in range(4):
                    l1.append((x, y))
                    x += 8
                y += 8
            self.init(l1, img_n, shape)
            shape = 'sq9'
            img_n = 1
            l1 = []
            y = 0
            for r in range(3):
                x = 0
                for c in range(5):
                    l1.append((x, y))
                    x += 8
                y += 8
            self.init(l1, img_n, shape)

        elif n == 9:  # Dodecagons & Triangles
            shape = 'dodec9'
            img_n = 0
            l1 = []
            y = 4.68
            dx = 22.48
            dy = 12.88
            for r in range(2):
                x = 4.76
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = 16
            y = 4.68 - dy / 2
            for r in range(3):
                l1.append((x, y))
                y += dy
            self.init(l1, img_n, shape)

        elif n == 10:  # Squares & Triangles
            shape = 'tri10'
            img_n = 0
            l1 = []
            s = 5.856
            h = s * 1.732 / 2
            dx = 16
            dy = dx
            y = s / 2
            for r in range(2):
                x = h / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = h / 2 + dx / 2
            y = s / 2 + dy / 2
            for c in range(2):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'sq10'
            img_n = 1
            l1 = []
            y = (1.5 * s + h) / 2
            for r in range(2):
                x = (.5 * s + h) / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            y = (1.5 * s + h) / 2 - dy / 2
            for r in range(2):
                x = (.5 * s + h) / 2 + dx / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            self.init(l1, img_n, shape)
            shape = 'sq11'
            img_n = 2
            l1 = []
            y = (1.5 * s + h) / 2 - dy / 2
            for r in range(2):
                x = (.5 * s + h) / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            y = (1.5 * s + h) / 2
            for r in range(2):
                x = (.5 * s + h) / 2 + dx / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            self.init(l1, img_n, shape)
            shape = 'tri11'
            img_n = 3
            l1 = []
            y = s / 2
            for r in range(2):
                x = 16 - h / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = s / 2 + h / 2
            y = s / 2 + dy / 2
            for c in range(2):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'tri12'
            img_n = 4
            l1 = []
            y = s + h / 2 - dy / 2
            for r in range(2):
                x = h + s / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = 0
            y = s + h / 2
            for c in range(3):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'tri13'
            img_n = 5
            l1 = []
            y = s / 2 + h / 2
            for r in range(2):
                x = h + s / 2
                for c in range(2):
                    l1.append((x, y))
                    x += dx
                y += dy
            x = 0
            y = s + h + h / 2
            for c in range(3):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)

        elif n == 11:  # Squares & Triangles
            shape = 'sq14'
            img_n = 0
            l1 = []
            s = 8
            h = s * 1.732 / 2
            x = s / 2
            y = s / 2
            dx = s
            dy = s + h
            for c in range(4):
                l1.append((x, y))
                x += dx
            x = 0
            y += dy
            for c in range(5):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'tri15'
            img_n = 1
            l1 = []
            x = s / 2
            y = s + h / 2
            dx = s
            for c in range(4):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)
            shape = 'tri14'
            img_n = 2
            l1 = []
            x = 0
            y = s + h / 2
            dx = s
            for c in range(5):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)

        elif n == 12:  # Hexagons & Triangles
            s = 5.333
            h = s * 1.732 / 2
            shape = 'hex16'
            img_n = 0
            l1 = []
            l1.append((s / 2, h))
            l1.append((3 * s, 2 * h))
            l1.append((5 * s, 0))
            l1.append((s, 4 * h))
            l1.append((3.5 * s, 5 * h))
            l1.append((5.5 * s, 3 * h))
            self.init(l1, img_n, shape)
            shape = 'tri16'
            img_n = 1
            l1 = []
            dx = s
            x = 1.5 * s
            y = .5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            x = 4 * s
            y = 1.5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            l1.append((2 * s, y))
            x = s / 2
            y = 2.5 * h
            for c in range(2):
                l1.append((x, y))
                x += dx
            l1.append((4.5 * s, y))
            x = 2 * s
            y = 3.5 * h
            l1.append((0, y))
            for c in range(3):
                l1.append((x, y))
                x += dx
            x = 4.5 * s
            y = 4.5 * h
            l1.append((2.5 * s, y))
            for c in range(2):
                l1.append((x, y))
                x += dx
            self.init(l1, img_n, shape)

            shape = 'tri17'
            img_n = 2
            l1 = []
            x = 2 * s
            y = .5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            l1.append((32, y))
            x = 4.5 * s
            y = 1.5 * h
            l1.append((1.5 * s, y))
            for c in range(2):
                l1.append((x, y))
                x += dx
            x = 0
            y = 2.5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            l1.append((4 * s, y))
            x = 2.5 * s
            y = 3.5 * h
            for c in range(3):
                l1.append((x, y))
                x += dx
            y = 4.5 * h
            l1.append((0, y))
            l1.append((2 * s, y))
            l1.append((5 * s, y))
            l1.append((32, y))
            self.init(l1, img_n, shape)
        self.setup()

Beispiel #45

0

Datei anzeigen

Datei: corpus.py Projekt: mcka1n/dissertation

    def evaluate(self, learner,
            partition='test', debug_idxs=None, skip_idxs=(), decoder='ilp',
            n_eval=(1,2,3,4), streaming=True, overwritten_params=(),
            eval_path=None, output_path=None, lm_proxy=None, **kwargs):
        """Run the transduction model on designated test instances and report
        performance metrics.
        """
        # When evaluating multiple iterations of the same model over a fixed
        # partition, decoding should ensure that initialization isn't
        # unnecessarily repeated.
        if learner is not None:
            eval_instances = self.decode_instances(learner,
                                                   partition=partition,
                                                   debug_idxs=debug_idxs,
                                                   skip_idxs=skip_idxs,
                                                   decoder=decoder,
                                                   streaming=streaming,
                                                   overwritten_params=\
                                                           overwritten_params,
                                                   **kwargs)
            system_name = learner.name
        else:
            eval_instances = self.get_instances(partition=partition,
                                                debug_idxs=debug_idxs,
                                                skip_idxs=skip_idxs)
            system_name = 'baseline'
        num_instances = len(eval_instances)

        # Record overwritten parameters in the filenames
        overwriting_str = None
        if len(overwritten_params) > 0:
            overwriting_str = '_OW-'
            i = 0
            for param_name, value in overwritten_params.iteritems():
                if isinstance(value, list) or isinstance(value, tuple):
                    overwriting_str += '+'.join(str(v) for v in sorted(value))
                else:
                    overwriting_str += str(value)
                i += 1
                if i < len(overwritten_params):
                    overwriting_str += '-'

        if output_path is not None:
            output_filename = ''.join((output_path, '/',
                    '_'.join((partition, 'under', system_name)),
                    overwriting_str if overwriting_str is not None else '',
                    '_', decoder, '.out'))
            outf = open(output_filename, 'wb')

        # Determine the evaluations to run by looking at a representative
        # instance
        i = 0
        while i < len(eval_instances) and \
                not hasattr(eval_instances[i], 'output_sent'):
            i += 1
        if i == len(eval_instances):
            print "WARNING: all instances failed; skipping evaluation"
            sys.exit()
        some_instance = eval_instances[i]
        has_labels = hasattr(some_instance, 'label_sentences')
        has_rasp = hasattr(some_instance.gold_sentences[0], 'relgraph')
        has_outtrees = hasattr(some_instance.output_sent, 'outtree')
        has_outframes = hasattr(some_instance.output_sent, 'outframes')

        # FIXME TEMPORARY! MUST MAKE "False" FOR TEST!
        skip_failed = False

        # Initialize the evaluations
        eval_obj = evaluation.Evaluation(title='TRANSDUCTION_EVAL')
        output_sents = []
        with timer.AvgTimer(num_instances):
            for i, instance in enumerate(eval_instances):
                sys.stdout.write("Evaluating " + str(num_instances) +
                        (" " + partition if partition is not None else "") +
                        " instances: " + str(i+1) + '\r')

                # Duration and failure status
                eval_obj.include(
                        system=system_name,
                        corpus='other',
                        decode_time=instance.decode_times[-1],
                        solution_time=instance.solution_times[-1] \
                                if len(instance.solution_times) > 0 else 0,
                        inputs=len(instance.input_sents),
                        _failed=int(not hasattr(instance, 'output_sent')),
                        )

                if skip_failed and not hasattr(instance, 'output_sent'):
                    print "WARNING: Skipping failed instance", instance.idx
                    continue

                # POS tag recall
                for use_labels in set([False]) | set([has_labels]):
                    for prefix in ('NN', 'VB', 'JJ', 'RB'):
                        p, r, f = instance.score_content_words(
                                use_labels=use_labels, prefixes=(prefix,))
                        eval_obj.add_metrics(
                                precision=p,
                                recall=r,
                                system=system_name,
                                corpus=('LBLs ' + prefix) if use_labels \
                                        else ('GOLD ' + prefix),
                                )

                try:
                    if lm_proxy is not None:
                        output_tokens = instance.output_sent.tokens \
                                if hasattr(instance, 'output_sent') else []
                        eval_obj.include(
                                system=system_name,
                                corpus='other',
                                lm=lm_proxy.score_sent(output_tokens)
                                )
                except jsonrpc.RPCTransportError:
                    print "ERROR: JSON-RPC hiccups; skipping LM scoring"
                    pass

                if decoder.startswith('dp+'):
                        # Record convergence of dual decomposition or
                        # bisection. Will be 0 if neither are used.
                        eval_obj.include(
                                system=system_name,
                                corpus='other',
                                convergence_=int(instance.converged),
                                iterations=instance.num_iterations,
                                )

                if len(instance.sentences) == 1:
                    # Paraphrasing or compression-specific metrics
                    eval_obj.include(
                            system=system_name,
                            corpus='STATS gold',
                            comp_=instance.get_gold_compression_rate(),
                            length=instance.avg_gold_len,
                            proj_=avg(int(gold_sent.dparse.is_projective())
                                for gold_sent in instance.gold_sentences),
                            overlap_=avg(instance.get_overlap(gold_sent)
                                for gold_sent in instance.gold_sentences),
                            )
                    eval_obj.include(
                            system=system_name,
                            corpus='STATS input',
                            comp_=1.0,
                            length=instance.avg_len,
                            proj_=int(
                                instance.sentences[0].dparse.is_projective()),
                            overlap_=instance.get_overlap(
                                instance.sentences[0])
                            )
                    eval_obj.include(
                            system=system_name,
                            corpus='STATS output',
                            comp_=instance.get_compression_rate(),
                            length=len(instance.output_sent.tokens)
                                    if hasattr(instance, 'output_sent') else 0,
                            )
                    if hasattr(instance, 'output_sent') and has_outtrees:
                        eval_obj.include(
                                system=system_name,
                                corpus='STATS output',
                                proj_=int(instance.output_sent.\
                                          outtree.is_projective())
                                      if hasattr(instance.output_sent.outtree,\
                                                 'is_projective')
                                      else 0,
                                overlap_=instance.get_overlap(
                                    instance.output_sent,
                                    parse_type='outtree')
                                )

#                    print "INSTANCE ", instance.idx
#                    crossing_edges = \
#                        instance.output_sent.outtree.get_crossing_edges()
#                    print "\n\nINPUT:",
#                    self.dump_parse(instance.sentences[0])
#
#                    for gs, gold_sent in enumerate(
#                            instance.gold_sentences):
#                        # get output indices for gold
#                        gold_idxs = []
#                        i = 0
#                        for token in gold_sent.tokens:
#                            while instance.sentences[0].tokens[i] != token:
#                                i += 1
#                            gold_idxs.append((0,i))
#
#                        print "\nGOLD:", gs,
#                        self.dump_parse(gold_sent,
#                            idx_mapper=gold_idxs)
#
#                    print "\n\nOUTPUT:",
#                    self.dump_parse(instance.output_sent,
#                            parse_type='outtree',
#                            crossing_edges=crossing_edges,
#                            idx_mapper=instance.output_idxs)

                # n-gram precision and recall
                for use_labels in set([False]) | set([has_labels]):
                    for n in n_eval:
                        p, r, f = instance.score_ngrams(n=n,
                                use_labels=use_labels)
                        eval_obj.add_metrics(
                                precision=p,
                                recall=r,
                                system=system_name,
                                corpus='LBLs n='+str(n) if use_labels else
                                       'GOLD n='+str(n),
                                )
                if hasattr(instance, 'output_sent') and has_outframes:
                    # Precision and recall for frames
                    p, r, f = instance.score_frames(fes=False,
                                                    frames_type='outframes',
                                                    use_labels=use_labels)
                    eval_obj.add_metrics(
                            precision=p,
                            recall=r,
                            system=system_name,
                            corpus="GOLD frames",
                            )

                    # Precision and recall for frame elements
                    p, r, f = instance.score_frames(fes=True,
                                                    frames_type='outframes',
                                                    use_labels=use_labels)
                    eval_obj.add_metrics(
                            precision=p,
                            recall=r,
                            system=system_name,
                            corpus="GOLD fes",
                            )

                # Parse output sentences for syntactic evaluation. The
                # 100 token limit is intended for the Stanford parser.
                if hasattr(instance, 'output_sent') and \
                        len(instance.output_sent.tokens) <= 100:
                    output_sents.append(instance.output_sent)

                # Write the output to a file
                if output_path is not None:
                    outf.write(instance.get_display_string())
#            print
            if output_path is not None:
                outf.close()

            # Parse-based evaluations
            try:
                parse_types = ['dparse']
                if has_outtrees:
                    parse_types.append('outtree')

                # Get annotations. Only run RASP if the inputs have RASP
                # annotations since it's slow
                annotations.annotate(output_sents, 'Stanford')
                if has_rasp:
                    annotations.annotate(output_sents, 'Rasp')
                    parse_types.append('relgraph')

                # Add dependency results to evaluations
                for i, instance in enumerate(eval_instances):
                    if skip_failed and not hasattr(instance, 'output_sent'):
                        print "WARNING: Skipping failed instance",
                        print instance.idx, "again"
                        continue

                    for parse_type in parse_types:
                        for use_labels in set([False]) | set([has_labels]):
                            name = ('LBLs ' if use_labels else 'GOLD ') + \
                                parse_type
                            p, r, f = instance.score_dependencies(
                                    parse_type=parse_type,
                                    use_labels=use_labels)
                            eval_obj.add_metrics(
                                    precision=p,
                                    recall=r,
                                    system=system_name,
                                    corpus=name,
                                    _failed=int(not instance.has_output_parses(
                                            parse_type=parse_type)))
            except OSError:
                print "Skipping parser evaluations"

        print eval_obj.title
        print eval_obj.table(skip_single_keys=True)
        if eval_path is not None and debug_idxs is None:
            eval_filename = ''.join((eval_path, '/',
                    '_'.join((partition, 'under', system_name)),
                    overwriting_str if overwriting_str is not None else '',
                    '_', decoder,
                    '.eval'))
            eval_obj.save(eval_filename, append=False)

Beispiel #46

0

Datei anzeigen

Datei: error_averages.py Projekt: AndersHqst/SAAS-2014

def calc_avg_errors(output_folder):

    from parsers import CVOutputParser
    from utils import interpolate, avg
    import math
    from collections import Counter
    import os
    """ 
    Average error calculation on CV output.
    """
    if not output_folder[-1] == '/':
        output_folder += '/'
    
    # better_than_baseline_file = open('better_than_base_line.tsv', 'w')
    # better_than_baseline_file.write('est\tobs\tn1\tn2\tn3\tpair_trip_ratio\ts1\ts2\ts3\ts12\ts13\ts23\ts123\n')

    # small_error_file = open('small_error.tsv', 'w')
    # small_error_file.write('est\tobs\tn1\tn2\tn3\tpair_trip_ratio\ts1\ts2\ts3\ts12\ts13\ts23\ts123\n')    
    baseline = 88.5
    iteration = 0
    points_evaluated = 0
    over_estimates = 0
    all_sample_errors = []
    while True:
        tsv_file = output_folder + str(iteration) + '_data_zero_trips.tsv'

        if not os.path.exists(tsv_file):
            break

        sample_erros = []
        for (n1, n2, n3), (est, obs, ratio, triangle) in CVOutputParser.read_est_obs_file_disc_version_2(tsv_file):

            s1, s2, s3, s12, s13, s23, s123 = triangle

            # if int(obs) < 200 or s123 == 0:
            #     continue

            # Heurestiv for extrapolation, 200000 in sample
            # est = min(s12, s13, s23) / 200000. * (21006480-200000)

            points_evaluated += 1
            if est > obs:
                over_estimates += 1

            # if obs > baseline:
            #     if abs(est-obs) < abs(est-baseline):
            #         better_than_baseline_file.write(str(est) + '\t' + str(obs) + '\t' + str(n1) + '\t' + str(n2) + '\t' + str(n3) + '\t' + str(ratio) + '\t' + str(s1) + '\t' + str(s2) + '\t' + str(s3) + '\t' + str(s12) + '\t' + str(s13) + '\t' + str(s23) + '\t' + str(s123) + '\n')

            error = abs(est-obs) / math.sqrt(obs)
            # if error < 3:
            #     small_error_file.write(str(est) + '\t' + str(obs) + '\t' + str(n1) + '\t' + str(n2) + '\t' + str(n3) + '\t' + str(ratio) + '\t' + str(s1) + '\t' + str(s2) + '\t' + str(s3) + '\t' + str(s12) + '\t' + str(s13) + '\t' + str(s23) + '\t' + str(s123) + '\n')
            sample_erros.append(error)
        all_sample_errors.append(avg(sample_erros))
        iteration += 1

    # better_than_baseline_file.close()
    # small_error_file.close()

    avg_error = avg(all_sample_errors)
    print 'avg_error ', avg_error
    print 'points evaluated', points_evaluated
    print 'over estimates: ', over_estimates
    return avg_error, all_sample_errors

Beispiel #47

0

Datei anzeigen

Datei: corpus.py Projekt: mcka1n/dissertation

    def check_dep_coverage(self, partition='train', debug_idxs=None,
            skip_idxs=(), var_conf=None):
        """Record the fraction of potential arcs that are present in gold
        trees.
        """
        var_flags = variables.TransductionVariables.parse_var_conf(var_conf)
        tgt_instances = self.get_instances(partition=partition,
                                           debug_idxs=debug_idxs,
                                           skip_idxs=skip_idxs)
        prev_average_overlap = None
        print "ancestor_limit\tavg_overlap_rate\tavg_reachability"
        for ancestor_limit in range(30):
            ancestor_limit = None if ancestor_limit == 0 \
                                  else ancestor_limit
            instance_overlaps = []
            instance_reachability = []

            for instance in tgt_instances:
                # TODO: merge with instance.get_overlap()
                sent_dep_tuples = instance.get_constrained_dep_tuples(
                                instance.sentences[0],
                                original_tree=var_flags['orig_deps'],
                                ancestor_dags=var_flags['anc_deps'],
                                pos_matching=var_flags['pos_deps'],
                                noninverted_deps=var_flags['noninv_deps'],
                                fixed_root=var_flags['fixed_root'],
                                verb_root=var_flags['verb_root'],
                                ancestor_limit=ancestor_limit)
                gold_overlaps = []
                for gold_sent in instance.gold_sentences:
                    gold_dep_tuples = instance.get_dep_tuples(
                                gold_sent,
                                parse_type='dparse')
                    gold_dep_tuple_set = set(gold_dep_tuples)
                    overlap = gold_dep_tuple_set.intersection(
                                sent_dep_tuples)
                    gold_overlaps.append(len(overlap) / \
                                         len(gold_dep_tuple_set))

#                    if len(overlap) < len(gold_dep_tuple_set):
#                        print instance.get_display_string()
#                        print gold_dep_tuple_set - overlap

                instance_overlaps.append(avg(gold_overlaps))
                instance_reachability.append(int(min(gold_overlaps) == 1))

            average_overlap = avg(instance_overlaps)
            average_reachability = avg(instance_reachability)
            if average_overlap == prev_average_overlap:
                continue
            prev_average_overlap = average_overlap

            print ancestor_limit, '\t\t', average_overlap,
            print '\t\t', average_reachability
            print '\t\t\t', sum(instance_overlaps),
            print '\t\t', sum(instance_reachability)
            print '\t\t\t', len(instance_overlaps),
            print '\t\t', len(instance_reachability)
            if not var_flags['anc_deps'] or \
                    (ancestor_limit > 0 and average_overlap == 1.0):
                break