Ejemplo n.º 1
1
    def mkhist(fname, xmin, xmax, delta, ihist):
        xdata = []
        if os.path.exists(fname + ".gz"):
            import gzip

            fp = gzip.open(fname + ".gz")
        else:
            fp = open(fname)
        for line in fp:
            time, x = map(float, line.strip().split()[:2])
            xdata.append(x)
        x = np.array(xdata)
        xbins = [xmin + i * delta for i in range(nbin + 1)]
        hist[ihist], edges = np.histogram(x, bins=xbins, range=(xmin, xmax))
        nb_data[ihist] = int(np.sum(hist[ihist, :]))

        print "statistics for timeseries # ", ihist
        print "minx:", "%8.3f" % np.min(x), "maxx:", "%8.3f" % np.max(x)
        print "average x", "%8.3f" % np.average(x), "rms x", "%8.3f" % np.std(x)
        print "statistics for histogram # ", ihist
        print int(np.sum(hist[ihist, :])), "points in the histogram"
        print "average x", "%8.3f" % (
            np.sum([hist[ihist, i] * (edges[i] + edges[i + 1]) / 2 for i in range(nbin)]) / np.sum(hist[ihist])
        )
        print

        var = (
            1.0
            / (nblock * (nblock - 1))
            * np.sum([np.average((x[k : (k + 1) * (len(x) / nblock)] - np.average(x)) ** 2) for k in range(nblock)])
        )
        return var
Ejemplo n.º 2
0
def linearRegression(segmentedValues):
	print("Linear regression")
	#regression = LinearRegression()
	linRegress = dict()
	for key in segmentedValues.keys():
		x = [x[0] for x in segmentedValues[key]]
		y = [x[1] for x in segmentedValues[key]]
		mean = [float(np.average(x)),float(np.average(y))]
		valuesDict = dict()
		valuesDict['x'] = x
		valuesDict['y'] = y
		valuesFrame = pd.DataFrame(valuesDict)
		try:
			rlmRes = sm.rlm(formula = 'y ~ x', data=valuesFrame).fit()
		except ZeroDivisionError:
			#I have no idea why this occurs. A problem with statsmodel
			#Return None
			print("divide by zero :( ")
			return None
		#Caclulate r2_score (unfortunately, rlm does not give this to us)
		x = np.array(x)
		y = np.array(y)
		#Get the predicted values of Y
		y_pred = x*rlmRes.params.x+rlmRes.params.Intercept
		score = r2_score(y, y_pred)
		#These should both be positive -- put in abs anyway
		slopeConfInterval = abs(float(rlmRes.params.x) - float(rlmRes.conf_int(.005)[0].x))
		intConfInterval = abs(float(rlmRes.params.Intercept) - float(rlmRes.conf_int(.005)[0].Intercept))
		#Slope, Intercept, R^2, num of values, confidenceIntervals, mean of cluster
		linRegress[key] = [rlmRes.params.x, rlmRes.params.Intercept, score, len(x), [slopeConfInterval, intConfInterval], mean]
		print("Key: "+str(key)+" Slope: "+str(rlmRes.params.x)+" Intercept: "+str(rlmRes.params.Intercept)+"R2 Score: "+str(score)+" Num vals: "+str(len(x))+" confidence: "+str(slopeConfInterval)+", "+str(intConfInterval)+" mean: "+str(mean))
	return linRegress
Ejemplo n.º 3
0
  def testEncodeAdjacentPositions(self, verbose=False):
    repetitions = 100
    n = 999
    w = 25
    radius = 10
    minThreshold = 0.75
    avgThreshold = 0.90
    allOverlaps = np.empty(repetitions)

    for i in range(repetitions):
      overlaps = overlapsForRelativeAreas(n, w,
                                          np.array([i * 10, i * 10]), radius,
                                          dPosition=np.array([0, 1]),
                                          num=1)
      allOverlaps[i] = overlaps[0]

    self.assertGreater(np.min(allOverlaps), minThreshold)
    self.assertGreater(np.average(allOverlaps), avgThreshold)

    if verbose:
      print ("===== Adjacent positions overlap "
             "(n = {0}, w = {1}, radius = {2}) ===").format(n, w, radius)
      print "Max: {0}".format(np.max(allOverlaps))
      print "Min: {0}".format(np.min(allOverlaps))
      print "Average: {0}".format(np.average(allOverlaps))
Ejemplo n.º 4
0
def kMeans(k, centres, data, error, return_cost = False):
    # centres (kx2)
    # data (Nx2)
    # error: epsilon
    m = centres[:]
    
    while(True):
        sets = [[] for i in range(k)]
        
        for point in data:
            # Calculate distance
            dist_sq = np.sum((point - m) ** 2, axis = 1)
            # Choose the nearest centre and add point into corresponding set
            sets[np.argmin(dist_sq)].append(point)
            
        temp_m = m[:]
        for i in range(len(sets)):
            if sets[i] != []:
                temp_m[i] = (np.mean(sets[i], axis = 0)) # centroid
            
        temp_m = np.array(temp_m)
        changes = temp_m - m
        m = temp_m
        
        if((changes < error).all()):
            break
    
    if(return_cost):
        costs = []
        for i in range(len(sets)):
            costs.append(np.average(np.sqrt(np.sum((m[i] - sets[i]) ** 2, axis = 1))))
        cost = np.average(costs)
        return m, cost
    else:
        return m
Ejemplo n.º 5
0
        def __call__(self,x,k=5):

            if k<2:
                raise Exception("Need k>1")
            if x.ndim != self.xtrain[0].ndim:
                raise Exception("Requested x and training set do not have the same number of dimension.")

            #Change basis
            x0 = np.dot(self.L_mat,x)

            #Get nearest neighbors
            dist, loc = self.transf_xtree.query(x0,k=k)
            #Protect div by zero
            dist = np.array([np.max([1e-15,d]) for d in dist])
            weight = 1.0/dist
            nearest_y = self.ytrain[loc]

            #Interpolate with weighted average
            if self.ytrain.ndim > 1:
                y_predict = np.array([np.average(y0,weights=weight) for y0 in nearest_y.T])
                testgood = all([test_good(y) for y in y_predict])
            elif self.ytrain.ndim==1:
                y_predict = np.average(nearest_y,weights=weight)
                testgood = test_good(y_predict)
            else:
                raise Exception('The dimension of y training data is weird')


            if not testgood:
                raise Exception('y prediction went wrong')

            return y_predict
Ejemplo n.º 6
0
  def testEncodeUnrelatedAreas(self):
    """
    assert unrelated areas don"t share bits
    (outside of chance collisions)
    """
    avgThreshold = 0.3

    maxThreshold = 0.12
    overlaps = overlapsForUnrelatedAreas(1499, 37, 5)
    self.assertLess(np.max(overlaps), maxThreshold)
    self.assertLess(np.average(overlaps), avgThreshold)

    maxThreshold = 0.12
    overlaps = overlapsForUnrelatedAreas(1499, 37, 10)
    self.assertLess(np.max(overlaps), maxThreshold)
    self.assertLess(np.average(overlaps), avgThreshold)

    maxThreshold = 0.17
    overlaps = overlapsForUnrelatedAreas(999, 25, 10)
    self.assertLess(np.max(overlaps), maxThreshold)
    self.assertLess(np.average(overlaps), avgThreshold)

    maxThreshold = 0.25
    overlaps = overlapsForUnrelatedAreas(499, 13, 10)
    self.assertLess(np.max(overlaps), maxThreshold)
    self.assertLess(np.average(overlaps), avgThreshold)
def dist_avg_closest_pair(feats1,feats2,alpha=10):
    """
    Distance measure between two sets of fingerprint maxes
    feats is a 2xN matrix
    first row - time in seconds, usually starting from the beat
    second row - frequency, usually a row index
    Computes euclidean distance between feats1 and their closest
    point in feats2, samething reverse, average
    alpha is a multiplier of the seconds
    """
    # special cases with no maxes
    if feats1.shape[1] == 0 and feats2.shape[1] == 0:
        return 0
    if feats1.shape[1] == 0 and feats2.shape[1] > 0:
        return np.inf # we'll find better
        #return 250. / 100 * feats2.shape[1]
    if feats1.shape[1] > 0 and feats2.shape[1] == 0:
        return np.inf # we'll find better
        #return 250. / 100 * feats1.shape[1]
    # compute distance from each of the points in a N x M matrix
    distmat = np.zeros([feats1.shape[1],feats2.shape[1]])
    for l in range(distmat.shape[0]):
        for c in range(distmat.shape[1]):
            distmat[l,c] = math.hypot(alpha*(feats1[0,l]-feats2[0,c]),
                                      feats1[1,l]-feats2[1,c])
    # measure closest ones
    shortest_from_feats1 = map(lambda x: np.min(distmat[x,:]),range(feats1.shape[1]))
    shortest_from_feats2 = map(lambda x: np.min(distmat[:,x]),range(feats2.shape[1]))
    # return average of both
    return np.min([np.average(shortest_from_feats1),
                   np.average(shortest_from_feats2)])
Ejemplo n.º 8
0
def average_data(data):
    """
    Find mean and std. deviation of data returned by ``simulate``.
    """
    numnodes = data['nodes']
    its = data['its']
    its_mean = numpy.average(its)
    its_std = math.sqrt(numpy.var(its))
    dead = data['dead']
    dead_mean = 100.0*numpy.average(dead)/numnodes
    dead_std = 100.0*math.sqrt(numpy.var(dead))/numnodes
    immune = data['immune']
    immune_mean = 100.0*numpy.average(immune)/numnodes
    immune_std = 100.0*math.sqrt(numpy.var(immune))/numnodes
    max_contam = data['max_contam']
    max_contam_mean = 100.0*numpy.average(max_contam)/numnodes
    max_contam_std = 100.0*math.sqrt(numpy.var(max_contam))/numnodes
    normal = data['normal']
    normal_mean = 100.0*numpy.average(normal)/numnodes
    normal_std = 100.0*math.sqrt(numpy.var(normal))/numnodes
    return {'its': (its_mean, its_std),
            'nodes': numnodes,
            'dead': (dead_mean, dead_std),
            'immune': (immune_mean, immune_std),
            'max_contam': (max_contam_mean, max_contam_std),
            'normal': (normal_mean, normal_std)}
Ejemplo n.º 9
0
    def getDiameterQuantilesAlongSinglePath(self,path,G,counter=None):
        
        G=self.filterPathDiameters(path, G,self.__scale)
        x=[]
        y=[]
        length=0
        vprop=G.vertex_properties["vp"]
        for i in path:
            length+=1
            if vprop[i]['diameter'] > 0:
                x.append(length)
                y.append(vprop[i]['diameter'])
        coeffs=polyfit(x,y,1)

        besty =  polyval ( coeffs ,    x)
        
        self.__io.saveArray(x,self.__io.getHomePath()+'Plots/'+self.__io.getFileName()+'_DiameterX')
        self.__io.saveArray(y,self.__io.getHomePath()+'Plots/'+self.__io.getFileName()+'_DiameterY')

        l=len(y)-1
        l25=int(l*0.25)
        l50=int(l*0.5)
        l75=int(l*0.75)
        l90=int(l*0.90)
        
        d25=np.average(y[:l25])
        d50=np.average(y[l25:l50])
        d75=np.average(y[l50:l75])
        d90=np.average(y[l90:])

        
        self.__io.saveArray(y,self.__io.getHomePath()+'Plots/'+self.__io.getFileName()+'_DiameterHistoTP')
        
        return d25,d50,d75,d90
Ejemplo n.º 10
0
 def get_reference_pt(self):
     # Reference point for a compound object is the average of all
     # it's contituents reference points
     points = numpy.array([ obj.get_reference_pt() for obj in self.objects ])
     t_ = points.T
     x, y = numpy.average(t_[0]), numpy.average(t_[1])
     return (x, y)
Ejemplo n.º 11
0
def distance_matrics(vol1, vol2, voxelsize_mm):
    # crop data to reduce comutation time
    crinfo = qmisc.crinfo_from_specific_data(vol1, CROP_MARGIN)
    logger.debug(str(crinfo) + ' m1 ' + str(np.max(vol1)) +
                 ' m2 ' + str(np.min(vol2)))
    logger.debug("crinfo " + str(crinfo))
    vol1 = qmisc.crop(vol1, crinfo)
    vol2 = qmisc.crop(vol2, crinfo)

    border1 = get_border(vol1)
    border2 = get_border(vol2)

    #pyed = py3DSeedEditor.py3DSeedEditor(vol1, contour=vol1)
    #pyed.show()
    b1dst = scipy.ndimage.morphology.distance_transform_edt(
        1 - border1,
        sampling=voxelsize_mm
    )

    dst_b1_to_b2 = border2 * b1dst
    #import ipdb; ipdb.set_trace() # BREAKPOINT
    #pyed = py3DSeedEditor.py3DSeedEditor(dst_b1_to_b2, contour=vol1)
    #pyed.show()
    #print np.nonzero(border1)
    # avgd = np.average(dst_b1_to_b2[np.nonzero(border2)])
    avgd = np.average(dst_b1_to_b2[border2])
    rmsd = np.average(dst_b1_to_b2[border2] ** 2)
    maxd = np.max(dst_b1_to_b2)

    return avgd, rmsd, maxd
Ejemplo n.º 12
0
    def assign_nearest_nbh(self, query_doc):

        block_id, query_words, doc_words = query_doc
        query_vector = self.vectorize(query_words)
        doc_vector = self.vectorize(doc_words)
        #distance = emd(query_vector, doc_vector, self.distance_matrix)
        #return block_id, distance

        doc_indices = np.nonzero(doc_vector)[0]
        query_indices = np.nonzero(query_vector)[0]

        query_weights = [self.word_level_idf.get(q_i, 0) for q_i in query_indices]
        doc_weights = [self.word_level_idf.get(d_i, 0) for d_i in doc_indices]

        doc_centroid = np.average([self.embedding.model[self.reverse_vocab[i]] for i in doc_indices], axis=0,
                                  weights=doc_weights)
        query_centroid = np.average([self.embedding.model[self.reverse_vocab[i]] for i in query_indices], axis=0,
                                    weights=query_weights)

        # sklearn euclidean distances may not be a symmetric matrix, so taking
        # average of the two entries
        dist_arr = np.array([[(self.distance_matrix[w_i, q_j] + self.distance_matrix[q_j, w_i]) / 2
                              for w_i in doc_indices] for q_j in query_indices])

        label_assignment = np.argmin(dist_arr, axis=1)
        label_assignment = [(index, l) for index, l in enumerate(label_assignment)]

        distances = [dist_arr[(i,e)] * self.word_level_idf.get(query_indices[i], 1) for i, e in label_assignment]

        distance = (1 - self.alpha) * np.sum(distances) + \
                   self.alpha * sp.spatial.distance.cosine(doc_centroid,query_centroid)
        return block_id, distance
Ejemplo n.º 13
0
    def _fitter_worker(self, tasks, coords, subset_coords, masses, subset_masses, rmsdmat, pbar_counter):
        '''
        Fitter RMSD Matrix calculator. See encore.confdistmatrix.RMSDMatrixGenerator._fitter_worker for details.
        '''

        if subset_coords == None:
            for i,j in trm_indeces(tasks[0],tasks[1]):
                coords[i] -= average(coords[i], axis=0, weights=masses)
                coords[j] -= average(coords[j], axis=0, weights=masses)
                weights = asarray(masses)/mean(masses)
                rmsdmat[(i+1)*i/2+j] = - rmsd(coords[i],coords[j],weights=weights)
                pbar_counter.value += 1
        else:
            for i,j in trm_indeces(tasks[0],tasks[1]):
                #masses = asarray(masses)/mean(masses)
                summasses = sum(masses)
                com_i = average(subset_coords[i], axis=0, weights=subset_masses)
                translated_i = coords[i] - com_i
                subset1_coords = subset_coords[i] - com_i
                com_j = average(subset_coords[j], axis=0, weights=subset_masses)
                translated_j = coords[j] - com_j
                subset2_coords = subset_coords[j] - com_j
                rotamat = rotation_matrix(subset1_coords, subset2_coords, subset_masses)[0]
                rotated_i = transpose(dot(rotamat, transpose(translated_i)))
                rmsdmat[(i+1)*i/2+j] = MinusRMSD(rotated_i.astype(float64), translated_j.astype(float64), coords[j].shape[0], masses, summasses)   
                pbar_counter.value += 1
Ejemplo n.º 14
0
def genstats():
    # returns a list of dictionaries whereas each dictionary contains averages
    global db
    averages = [ 
        # {
            # "reporter": "",
            # "util": "",
            # "time_stddev": "",
            # "time_avg": "",
            # "vertices_avg": "",
            # "edges_avg": "",
        # }, 
    ] # lists of averages

    for (reporter, util), value in db.iteritems():
	value = {k:filter(lambda x: not (x is False or x is None), v) for k,v in value.iteritems()}

        averages.append(
            {
            "reporter": reporter,
            "util": util,
            "time_stddev" : np.std(value["time"], dtype=np.float64),
            "time_avg" : np.average(value["time"]),
            "vertices_avg" : np.average(value["vertices"]) if reporter!="none" else 0,
            "edges_avg" : np.average(value["edges"] if reporter!="none" else 0),
            "timedout_count" : sum(value["timedout"])
        })

    return averages
Ejemplo n.º 15
0
def AverageBar(indir='/Volumes/Documents/colbrydi/Documents/DirksWork/chamview/ChamB/'):
    tot = 0.0;
    R = np.array([0,0,0]);
    G = np.array([0,0,0]);
    B = np.array([0,0,0]);
    for root, dirs, filenames in os.walk(indir):
        filenames.sort()
        for f in filenames:
            if fnmatch.fnmatch(f,'0*.jpeg'):
                im = readim(os.path.join(root,f))
                sz = im.shape[0]
                #print(im.shape)
                r = np.zeros((sz,1))
                g = np.zeros((sz,1))
                b = np.zeros((sz,1))
                r[:,0] = np.average(im[:,:,0],1)
                g[:,0] = np.average(im[:,:,1],1)
                b[:,0] = np.average(im[:,:,2],1)
                if tot==0:
                    R = r
                    G = g
                    B = b
                else:
                    R = np.append(R, r, axis=1)
                    G = np.append(G, g, axis=1)
                    B = np.append(B, b, axis=1)
                tot=tot+1
    if tot==0:
        print('ERROR - No files found in '+indir)
        return '' 
    im3 = np.zeros((R.shape[0],R.shape[1], 3))
    im3[:,:,0] = R
    im3[:,:,1] = G
    im3[:,:,2] = B 
    return im3
Ejemplo n.º 16
0
def processLanes (lane_ids_as_string, row, junction,isIncomingLane):
    #append an empty row if there are no lanes in this junction    
    if (lane_ids_as_string==""):
        appendEmptyValuesToRow(row)
        return
    edge_prios=[]
    edge_types=[]
    lane_lengths=[]
    lane_speeds=[]    
    lane_id_list= lane_ids_as_string.split(" ")    
    for l_id in lane_id_list:
        try:            
            lane= lane_table[l_id]
            edge= lane.getparent()
            if isIncomingLane:    
                edge_types.append( edge.get("type"))
                edge_prios.append(float(edge.get("priority")))
            lane_lengths.append(float(lane.get("length")))
            lane_speeds.append(float(lane.get("speed")))
        except:
            print ("error with lane_ids: '{}', l_id:'{}' junction_id:'{}'".format(lane_ids_as_string,
                   l_id, row[0]))
            raise
        
    row.append(np.average(lane_speeds))
    row.append(np.std(lane_speeds))
    row.append(np.average(lane_lengths))
    row.append(np.std(lane_lengths))
    if isIncomingLane:        
        row.append(edge_types)
        row.append(np.average(edge_prios))
    else:
        row.append(None)
        row.append(-1)
    row.append(len(lane_id_list))
Ejemplo n.º 17
0
def tabular_td_lambda_offline(states, actions, generator_class, generator_args, l, alpha):
	gamma = 1
	rms_error = np.zeros(100)
	for i in range(100):
		values = {state: 0 for state in states}
		policies = {state: {action: 1.0/len(actions) for action in actions} for state in states}
		errors = []
		for j in range(10):
			episode_states = []
			rewards = []
			generator = generator_class(*generator_args)
			current_state = generator.state
			while True:
				action, next_state, reward = generator.step(policies, current_state)
				episode_states.append(current_state)
				rewards.append(reward)
				if next_state == None:
					break
				current_state = next_state
			# offline returns
			new_values = {state: values[state] for state in states}
			z = {state: 0 for state in states}
			for t, state in enumerate(episode_states):
				z[state] += 1
				if t < len(episode_states) - 1:
					delta = rewards[t]+gamma*values[episode_states[t+1]]-values[state]
				else:
					delta = rewards[t]-values[state]
				for state in states:
					new_values[state] += alpha*delta*z[state]
					z[state] *= (gamma*l)
			values = new_values
			errors.append(np.average([(values[state]-(state+1)/10.0+1)**2 for state in states])**0.5)
		rms_error[i] = np.average(errors)
	return np.average(rms_error)
Ejemplo n.º 18
0
def tabular_td_n_online(states, actions, generator_class, generator_args, n, alpha):
	gamma = 1
	rms_error = np.zeros(100)
	for i in range(100):
		values = {state: 0 for state in states}
		policies = {state: {action: 1.0/len(actions) for action in actions} for state in states}
		errors = []
		for j in range(10):
			episode_states = []
			rewards = []
			generator = generator_class(*generator_args)
			current_state = generator.state
			while True:
				action, next_state, reward = generator.step(policies, current_state)
				episode_states.append(current_state)
				rewards.append(reward)
				if next_state == None:
					break
				current_state = next_state
			# online returns
			for t, state in enumerate(episode_states):
				returns = 0
				for t_s in range(n):
					if t+t_s < len(episode_states):
						returns += gamma**t_s*rewards[t+t_s]
				if t+n < len(episode_states):
					last_episode_value = values[episode_states[t+n]]
				else:
					last_episode_value = 0
				values[state] += alpha*(returns+last_episode_value-values[state])
			errors.append(np.average([(values[state]-(state+1)/10.0+1)**2 for state in states])**0.5)
		rms_error[i] = np.average(errors)
	return np.average(rms_error)
Ejemplo n.º 19
0
def tabular_td_lambda_online_replacing_traces(states, actions, generator_class, generator_args, l, alpha):
	gamma = 1
	rms_error = np.zeros(100)
	for i in range(100):
		values = {state: 0 for state in states}
		policies = {state: {action: 1.0/len(actions) for action in actions} for state in states}
		errors = []
		for j in range(20):
			z = {state: 0 for state in states}
			generator = generator_class(*generator_args)
			current_state = generator.state
			while True:
				action, next_state, reward = generator.step(policies, current_state)
				z[current_state] = 1
				if next_state == None:
					delta = reward-values[current_state]
				else:
					delta = reward+gamma*values[next_state]-values[current_state]
				for state in states:
					values[state] += alpha*delta*z[state]
					z[state] *= (gamma*l)
				if next_state == None:
					break
				current_state = next_state
			errors.append(np.average([(values[state]-(state+1)/10.0+1)**2 for state in states])**0.5)
		rms_error[i] = np.average(errors)
	return np.average(rms_error)
Ejemplo n.º 20
0
    def randomized_auto_const_bg(self, amount):
        """ Automatically determine background. Only consider a randomly
        chosen subset of the image.
        
        Parameters
        ----------
        amount : int
            Size of random sample that is considered for calculation of
            the background.
        """
        cols = [randint(0, self.shape[1] - 1) for _ in xrange(amount)]

        # pylint: disable=E1101,E1103
        data = self.astype(to_signed(self.dtype))
        # Subtract average value from every frequency channel.
        tmp = (data - np.average(self, 1).reshape(self.shape[0], 1))
        # Get standard deviation at every point of time.
        # Need to convert because otherwise this class's __getitem__
        # is used which assumes two-dimensionality.
        tmp = tmp[:, cols]
        sdevs = np.asarray(np.std(tmp, 0))

        # Get indices of values with lowest standard deviation.
        cand = sorted(xrange(amount), key=lambda y: sdevs[y])
        # Only consider the best 5 %.
        realcand = cand[:max(1, int(0.05 * len(cand)))]

        # Average the best 5 %
        bg = np.average(self[:, [cols[r] for r in realcand]], 1)

        return bg.reshape(self.shape[0], 1)
Ejemplo n.º 21
0
 def calc_precision_recall_fmeasure(self):
     """ Computes Precision, Recall, F-measure and Support """
     
     #  precision, recall, F-measure and support for each class for a given thresholds
     for threshold in [10, 30, 50]:
         result = precision_recall_fscore_support(self.y_true, prediction_to_binary(self.y_pred, threshold))
         self.scores['Precision ' + str(threshold) + '%'] = result[0]
         self.scores['Recall ' + str(threshold) + '%'] = result[1]
         self.scores['F-score ' + str(threshold) + '%'] = result[2]
         self.scores['Support'] = result[3]
        
     # Computes precision-recall pairs for different probability thresholds
     self.precision, self.recall, self.thresholds = precision_recall_curve(self.y_true, self.y_pred)    
     #print "precision = " + str(precision)
     #print "recall = " + str(recall)
     #print "thresholds = " +  str(thresholds)
     
     # Compute the area under the precision-recall curve (average precision from prediction scores)
     self.scores['Precision-Recall AUC'] = average_precision_score(self.y_true, self.y_pred)    
     
     
     self.scores['Weighted Precision'] = average_precision_score(self.y_true, self.y_pred, average='weighted') # weighted average precision by support (the number of true instances for each label).
     self.scores['Average Recall'] = np.average(self.recall)
     self.scores['Average Threshold'] = np.average(self.thresholds)
     
     return
Ejemplo n.º 22
0
def main():
    train = pd.DataFrame.from_csv('train.csv')
    places_index = train['place_id'].values

    places_loc_sqr_wei = []
    for i, place_id in enumerate(train['place_id'].unique()):
        if not i % 100:
            print(i)
        place_df = train.iloc[places_index == place_id]
        place_weights_acc_sqred = 1 / (place_df['accuracy'].values ** 2)

        places_loc_sqr_wei.append([place_id,
                                   np.average(place_df['x'].values, weights=place_weights_acc_sqred),
                                   np.std(place_df['x'].values),
                                   np.average(place_df['y'].values, weights=place_weights_acc_sqred),
                                   np.std(place_df['y'].values),
                                   np.average(np.log(place_df['accuracy'].values)),
                                   np.std(np.log(place_df['accuracy'].values)),
                                   place_df.shape[0]])

        # print(places_loc_sqr_wei[-1])
        # plt.hist2d(place_df['x'].values, place_df['y'].values, bins=100)
        # plt.show()
        plt.hist(np.log(place_df['accuracy'].values), bins=20)
        plt.show()
    places_loc_sqr_wei = np.array(places_loc_sqr_wei)
    column_names = ['x_mean', 'x_sd', 'y_mean', 'y_sd', 'accuracy_mean', 'accuracy_sd', 'n_persons']
    places_loc_sqr_wei = pd.DataFrame(data=places_loc_sqr_wei[:, 1:], index=places_loc_sqr_wei[:, 0],
                                      columns=column_names)

    now = str(datetime.datetime.now().strftime("%Y-%m-%d-%H-%M"))
    places_loc_sqr_wei.to_csv('places_loc_sqr_weights_%s.csv' % now)
Ejemplo n.º 23
0
def Bplot(data,label,ylabel,trueVal):
    fig = plt.figure(dpi=600)
    ax = plt.subplot(111)
    bp = plt.boxplot(data, notch=0, sym='o', vert=1, whis=1.5,patch_artist=True)
    plt.setp(bp['boxes'], color='black',linewidth=1.5,facecolor='darkkhaki')
    plt.setp(bp['whiskers'], color='black',linewidth=1.5)
    plt.setp(bp['caps'], color='black',linewidth=1.5)
    plt.setp(bp['medians'], color='darkgreen',linewidth=1.5)
    plt.setp(bp['fliers'], color='grey', marker='o')
    ax.axhline(y=trueVal,xmin=0,xmax=1,c="r",linewidth=2.0,zorder=0,linestyle='--')

    ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey',alpha=0.8)
    ax.set_axisbelow(True)
    ax.set_ylabel(ylabel,fontsize = 24)
#     ax.set_xlabel(r'Variability',fontsize = 24)
    for tick in ax.yaxis.get_major_ticks():
        tick.label.set_fontsize(18)
    ax.set_xticklabels(label,fontsize=18)
    ax.get_xaxis().tick_bottom()
    ax.get_yaxis().tick_left()
       
    for i in range(len(data)):
        med = bp['medians'][i]
        plt.plot([np.average(med.get_xdata())], [np.average(data[i])],color='r', marker='*', markeredgecolor='k',markersize=10,label="Mean")
    
    fig.tight_layout()
    savingTitle = ylabel.translate(None,'${}')
    fig.savefig(''.join(['Plots/',funcFolder,'/Boxplots/%s.eps'])%(savingTitle),format='eps')
Ejemplo n.º 24
0
def read_midi_oifits(f,lam,dlam,phot=False):
	hdu=fits.open(f)
	w=hdu[3].data
	ww=w["EFF_WAVE"]
	ix=(ww>lam-dlam)&(ww<lam+dlam)

	v=hdu[4].data

	if phot:
		vv=v["CFLUX"]
		vv/=phot
		vv_noise=v["CFLUXERR"]
		vv_noise/=phot
	else:
		vv = v["VISAMP"]
		vv_noise = v["VISAMPERR"]

	vis = np.average(vv[:,ix],axis=1)
	## average noise and divide by sqrt(n) for sample average
	vis_noise = np.average(vv_noise[:,ix],axis=1)/np.sqrt(np.sum(ix))
	
	u=v["UCOORD"]
	v=v["VCOORD"]
	bl=np.sqrt(u**2+v**2)
	pa=np.rad2deg(np.arctan(u/v))
	return(bl,pa,u,v,vis,vis_noise)
Ejemplo n.º 25
0
	def __call__(self,u,v,w,iteration):
		q = 4

		plt.cool()
		if self.x == None:
			ny = v.shape[1]
			nz = v.shape[0]
			self.x,self.y = np.meshgrid(range(ny),range(nz))
		x,y = self.x,self.y

		if self.iterations == None:
			self.iterations = self.sim.bulk_calc(getIteration())
		all_itr = self.iterations

		if self.xvar == None:
			class temp(sim_operation):
				def get_params(self):
					return ["u"]
				def __call__(self,u):
					return np.max(self.sim.ddx(u))

			self.xvar = self.sim.bulk_calc(temp())
		xvar_series = self.xvar

		min = np.min(xvar_series)
		max = np.max(xvar_series)
		if min <= 0:
			min = 0.000001
		if max <= min:
			max = 0.00001
	
		avgu = np.average(u,2)
		avgv = np.average(v,2)
		avgw = -np.average(w,2)
		xd = self.sim.ddx(u)
		xd2d = np.max(xd,2)
		xd1d = np.max(xd2d,1)

		plt.subplot(221)
		plt.imshow(avgu)
		plt.quiver(x[::q,::q],y[::q,::q],avgv[::q,::q],avgw[::q,::q])
		plt.title('Avg u')
		plt.axis("tight")

		plt.subplot(222)
		plt.imshow(xd2d)
		plt.title('Max x Variation (y-z)')
		plt.axis("tight")

		plt.subplot(223)
		plt.plot(xd1d)
		plt.title('Max x Variation (z)')
		plt.axis("tight")

		plt.subplot(224)
		plt.plot(all_itr,xvar_series, '--')
		plt.plot([iteration,iteration],[min,max])
		plt.semilogy()
		plt.title('Max x Variation (t)')
		plt.axis("tight")
Ejemplo n.º 26
0
    def predict(self, xhat):

        res = []
        for xhat_ in xhat:

            source_d, target_d = self._getLocalDatapoints(self.data1, self.data2, self.topN, self.max_diff, xhat_)

            # Transform target data:
            #   Compute a difference array from the source and apply it to the target
            #   (local linear differences)
            source_d_diff = [s - xhat_ for s in source_d]
            target_data_transf = [t - s for t,s in zip(target_d, source_d_diff)]

            # Use transformed target data to compute expected RT in target domain (weighted average)
            # EXP = 0.65 # produces slightly better results
            EXP = 1.0
            expected_targ = numpy.average(target_data_transf, weights=[ 1/abs(s) if s > self.min_diff else self.min_diff for s in source_d_diff])
            expected_targ = numpy.average(target_data_transf, weights=[ 1/(abs(s)**EXP) if s > self.min_diff else self.min_diff for s in source_d_diff])

            # Compute a measurement of dispersion, standard deviation
            self.last_dispersion = numpy.std(target_data_transf)

            res.append( expected_targ )

        return res
Ejemplo n.º 27
0
def find_surface_potentials(E0, E1, E2, E3, E4, d, r, s, x0, x4):
    """Checks the surface potentials at the wires and the plates.
    Finds values and also checks uniformity.
    E0, E1, E2, E3, d, r, s have the same meaning as in W_3.
    x0 and x4 are locations far from wires, e.g., a cathode and anode.
    Returns a tuple of two 5-element lists.
    The first element of the tuple contains the voltages [V0, V1, V2, V3, V4],
    where V1,2,3 are the wire voltages and V0,V4 are potential at x0, x4.
    The second element contains the minimum and maximum values found along
    the surfaces.
    """
    V = [0.0]*6
    dV = [0.0]*6
    #-- check planes at two key points
    for i,x in ( (0,x0), (5,x4) ):
        ztest = np.array([x+0j, x+d*0.5j])
        Varr = W_3(ztest, E0, E1, E2, E3, E4, d, r, s).imag
        V[i] = np.average(Varr)
        dV[i] = Varr.max()-Varr.min()
    #-- check planes at four key points
    for i in [1,2,3,4]:
        x = (i-2)*s
        ztest = np.array([x+r+0j, x+r*1j, x-r+0j, x-r*1j])
        Varr = W_3(ztest, E0, E1, E2, E3, E4, d, r, s).imag
        V[i] = np.average(Varr)
        dV[i] = Varr.max()-Varr.min()
    return np.array(V), np.array(dV)
Ejemplo n.º 28
0
def summarize_sim_species(sim_df,obs_df):
    '''Returns a dictionary with key=observation location and value=travel time
    distribution. THIS IS WHERE STATISTICS FURTHER DESCRIBING THE
    TRAVEL TIME AND SOLUTE DISTRIBUTIONS SHOULD/COULD BE COMPUTED.'''
    
    species_summary_df = pd.DataFrame(columns=['ObsName','MeanTravel','StdTravel','MeanConcentration','StdConcentration'])
    icount = 0
    for iname,igrp in sim_df.groupby('ObsName'):
        
        # Weight the travel times and concentrations
        itravel  = igrp['ThisTravelTime']
        ispecies = igrp['RechargeConc']
        iweights = igrp['RechargeRate']
        
        # Compute statistics
        imean_travel = np.average(itravel,weights=iweights)
        ivariance_travel = np.average((itravel-imean_travel)**2,weights=iweights)
        istd_travel = math.sqrt(ivariance_travel)
        
        imean_species = np.average(ispecies,weights=iweights)
        ivariance_species = np.average((ispecies-imean_species)**2,weights=iweights)
        istd_species = math.sqrt(ivariance_species)
        
        species_summary_df.loc[icount,:] =[iname,imean_travel,istd_travel,imean_species,istd_species]
        icount += 1
        
    species_summary_df = pd.merge(species_summary_df,obs_df,on='ObsName')
        
    return species_summary_df
    def estimate(self):
        """ returns mean and variance """
        pos = self.particles[:, 0:2]
        mu = np.average(pos, weights=self.weights, axis=0)
        var = np.average((pos - mu)**2, weights=self.weights, axis=0)

        return mu, var
Ejemplo n.º 30
0
def direction_var(values, weights):
  import numpy
  from scitbx import matrix
  weights = numpy.array(weights)
  valx = numpy.array([x for x, y, z in values])
  valy = numpy.array([y for x, y, z in values])
  valz = numpy.array([z for x, y, z in values])

  # Calculate avergae x, y, z
  avrx = numpy.average(valx, weights=weights)
  avry = numpy.average(valy, weights=weights)
  avrz = numpy.average(valz, weights=weights)

  # Calculate mean direction vector
  s1m = matrix.col((avrx, avry, avrz)).normalize()

  # Calculate angles between vectors
  angles = []
  for s in values:
    angles.append(s1m.angle(s))

  # Calculate variance of angles
  angles = numpy.array(angles)
  var = numpy.dot(weights, (angles)**2)/numpy.sum(weights)
  return var
Ejemplo n.º 31
0
def part1(img, choice):
    if choice == 0: # a binary image
        ret_img = np.where(img > 128, 255, 0)
        cv2.imwrite('./output/1.jpg', ret_img)
        
    elif choice == 1: # a histogram
        statistic = np.zeros(256)
        r, c = img.shape
        for i in range(r):
            for j in range(c):
                statistic[img[i,j]] += 1

        plt.style.use('seaborn-white')
        plt.bar(range(256) ,statistic)
        plt.xlabel('pixel value')
        plt.ylabel('number')
        plt.savefig('./output/2.jpg')
        
    elif choice == 2: # connected components
        row, col = img.shape
        ret_img = np.where(img > 128, 255, 0)
        ret_map = np.zeros_like(ret_img)
        label = 1
        # initialization of each 1-pixel to a unique label
        for r in range(row):
            for c in range(col):
                if ret_img[r,c] == 255:
                    ret_map[r,c] = label
                    label += 1                   
        # iteration of top-down followed by bottom-up passes
        change = True
        while change:
            change = False
            for r in range(row):
                for c in range(col):
                    if ret_map[r,c] != 0: 
                        for i in range(-1,2,1): 
                            if r+i >= row or r+i < 0:
                                continue
                            for j in range(-1,2,1):
                                if c+j >= col or c+j < 0:
                                    continue
                                if ret_map[r+i, c+j] != 0:
                                    if ret_map[r,c] > ret_map[r+i, c+j]:
                                        ret_map[r,c] = ret_map[r+i, c+j]
                                        change = True
            for r in range(row-1,-1,-1):
                for c in range(col-1,-1,-1):
                    if ret_map[r,c] != 0:
                        for i in range(-1,2,1): 
                            if r+i >= row or r+i < 0:
                                continue
                            for j in range(-1,2,1):
                                if c+j >= col or c+j < 0:
                                    continue
                                if ret_map[r+i, c+j] != 0:
                                    if ret_map[r,c] > ret_map[r+i, c+j]:
                                        ret_map[r,c] = ret_map[r+i, c+j]
                                        change = True
                                        
        centroid_map = dict()
        box_map = dict()
        for r in range(row):
            for c in range(col):
                if ret_map[r,c] != 0:
                    if ret_map[r,c] not in centroid_map:
                        centroid_map[ret_map[r,c]] = [(r,c)]
                        box_map[ret_map[r,c]] = {'u':r,'d':r,'l':c,'r':c}
                    else:
                        centroid_map[ret_map[r,c]].append((r,c))
                        box_map[ret_map[r,c]]['u'] = min(box_map[ret_map[r,c]]['u'], r)
                        box_map[ret_map[r,c]]['d'] = max(box_map[ret_map[r,c]]['d'], r)
                        box_map[ret_map[r,c]]['l'] = min(box_map[ret_map[r,c]]['l'], c)
                        box_map[ret_map[r,c]]['r'] = max(box_map[ret_map[r,c]]['r'], c)

        ret_img = cv2.cvtColor(ret_img.astype(np.uint8), cv2.COLOR_GRAY2BGR)
        for key in centroid_map:
            if len(centroid_map[key]) > 500:
                cv2.rectangle(ret_img, (box_map[key]['l'],box_map[key]['u']), (box_map[key]['r'],box_map[key]['d']), (0,255,0), 4)
                centroid_r = round(np.average([ x[0] for x in centroid_map[key]]))
                centroid_c = round(np.average([ x[1] for x in centroid_map[key]]))
                cv2.line(ret_img, (centroid_c-10,centroid_r), (centroid_c+10,centroid_r), (0, 0, 255), 5)
                cv2.line(ret_img, (centroid_c,centroid_r-10), (centroid_c,centroid_r+10), (0, 0, 255), 5)
                
        cv2.imwrite('./output/3.jpg',ret_img)

    else:
        print('invalidate choice')
Ejemplo n.º 32
0
 def get_average_power(self, data, is_running_file):
     # Power does not pertain to running files
     if is_running_file:
         return
     return np.average(data[:, 3])
def pytorch_model_run_cv(x_train,
                         y_train,
                         features,
                         x_test,
                         model_obj,
                         feats=False,
                         clip=True):
    seed_everything()
    avg_losses_f = []
    avg_val_losses_f = []
    # matrix for the out-of-fold predictions
    train_preds = np.zeros((len(x_train)))
    # matrix for the predictions on the test set
    test_preds = np.zeros((len(x_test)))
    splits = list(
        StratifiedKFold(n_splits=n_splits, shuffle=True,
                        random_state=SEED).split(x_train, y_train))
    for i, (train_idx, valid_idx) in enumerate(splits):
        seed_everything(i * 1000 + i)
        x_train = np.array(x_train)
        y_train = np.array(y_train)
        if feats:
            features = np.array(features)
        x_train_fold = torch.tensor(x_train[train_idx.astype(int)],
                                    dtype=torch.long).cuda()
        y_train_fold = torch.tensor(y_train[train_idx.astype(int), np.newaxis],
                                    dtype=torch.float32).cuda()
        if feats:
            kfold_X_features = features[train_idx.astype(int)]
            kfold_X_valid_features = features[valid_idx.astype(int)]
        x_val_fold = torch.tensor(x_train[valid_idx.astype(int)],
                                  dtype=torch.long).cuda()
        y_val_fold = torch.tensor(y_train[valid_idx.astype(int), np.newaxis],
                                  dtype=torch.float32).cuda()

        model = copy.deepcopy(model_obj)

        model.cuda()

        loss_fn = torch.nn.BCEWithLogitsLoss(reduction='sum')
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                            model.parameters()),
                                     lr=0.001)

        ################################################################################################
        scheduler = False
        ###############################################################################################

        train = MyDataset(
            torch.utils.data.TensorDataset(x_train_fold, y_train_fold))
        valid = MyDataset(
            torch.utils.data.TensorDataset(x_val_fold, y_val_fold))

        train_loader = torch.utils.data.DataLoader(train,
                                                   batch_size=batch_size,
                                                   shuffle=True)
        valid_loader = torch.utils.data.DataLoader(valid,
                                                   batch_size=batch_size,
                                                   shuffle=False)

        print(f'Fold {i + 1}')
        for epoch in range(n_epochs):
            start_time = time.time()
            model.train()

            avg_loss = 0.
            for i, (x_batch, y_batch, index) in enumerate(train_loader):
                if feats:
                    f = kfold_X_features[index]
                    y_pred = model([x_batch, f])
                else:
                    y_pred = model(x_batch)

                if scheduler:
                    scheduler.batch_step()

                # Compute and print loss.
                loss = loss_fn(y_pred, y_batch)
                optimizer.zero_grad()
                loss.backward()
                if clip:
                    nn.utils.clip_grad_norm_(model.parameters(), 1)
                optimizer.step()
                avg_loss += loss.item() / len(train_loader)

            model.eval()

            valid_preds_fold = np.zeros((x_val_fold.size(0)))
            test_preds_fold = np.zeros((len(x_test)))

            avg_val_loss = 0.
            for i, (x_batch, y_batch, index) in enumerate(valid_loader):
                if feats:
                    f = kfold_X_valid_features[index]
                    y_pred = model([x_batch, f]).detach()
                else:
                    y_pred = model(x_batch).detach()

                avg_val_loss += loss_fn(y_pred,
                                        y_batch).item() / len(valid_loader)
                valid_preds_fold[index] = sigmoid(y_pred.cpu().numpy())[:, 0]

            elapsed_time = time.time() - start_time
            print(
                'Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s'
                .format(epoch + 1, n_epochs, avg_loss, avg_val_loss,
                        elapsed_time))
        avg_losses_f.append(avg_loss)
        avg_val_losses_f.append(avg_val_loss)
        # predict all samples in the test set batch per batch
        for i, (x_batch, ) in enumerate(test_loader):
            if feats:
                f = test_features[i * batch_size:(i + 1) * batch_size]
                y_pred = model([x_batch, f]).detach()
            else:
                y_pred = model(x_batch).detach()

            test_preds_fold[i * batch_size:(i + 1) * batch_size] = sigmoid(
                y_pred.cpu().numpy())[:, 0]

        train_preds[valid_idx] = valid_preds_fold
        test_preds += test_preds_fold / len(splits)

    print('All \t loss={:.4f} \t val_loss={:.4f} \t '.format(
        np.average(avg_losses_f), np.average(avg_val_losses_f)))
    return train_preds, test_preds
Ejemplo n.º 34
0
 def get_average_speed(self, data):
     return np.average(data[:, 1])
Ejemplo n.º 35
0
 def get_average_cadence(self, data):
     return np.average(data[:, 2])
Ejemplo n.º 36
0
            def callback(theta_value):
                self.epoch += 1
                if (self.epoch) % validate_every == 0:
                    self.rnn.theta.set_value(theta_value, borrow=True)
                    # compute loss on training set
                    train_losses = [
                        compute_train_error(i, n_train)
                        for i in xrange(n_train_batches)
                    ]
                    train_batch_sizes = [
                        get_batch_size(i, n_train)
                        for i in xrange(n_train_batches)
                    ]

                    this_train_loss = np.average(train_losses,
                                                 weights=train_batch_sizes)

                    if compute_zero_one:
                        train_zero_one = [
                            compute_train_zo(i, n_train)
                            for i in xrange(n_train_batches)
                        ]

                        this_train_zero_one = np.average(
                            train_zero_one, weights=train_batch_sizes)

                    if self.interactive:
                        test_losses = [
                            compute_test_error(i, n_test)
                            for i in xrange(n_test_batches)
                        ]

                        test_batch_sizes = [
                            get_batch_size(i, n_test)
                            for i in xrange(n_test_batches)
                        ]

                        this_test_loss = np.average(test_losses,
                                                    weights=test_batch_sizes)

                        if compute_zero_one:
                            test_zero_one = [
                                compute_test_zo(i, n_test)
                                for i in xrange(n_test_batches)
                            ]

                            this_test_zero_one = np.average(
                                test_zero_one, weights=test_batch_sizes)

                        if compute_zero_one:
                            logger.info('epoch %i, tr loss %f, '
                                        'tr zo %f, te loss %f '
                                            'te zo %f' % \
                                        (self.epoch, this_train_loss,
                                         this_train_zero_one, this_test_loss,
                                         this_test_zero_one))
                        else:
                            logger.info('epoch %i, tr loss %f, te loss %f' % \
                                        (self.epoch, this_train_loss,
                                         this_test_loss, self.learning_rate))

                    else:
                        if compute_zero_one:
                            logger.info('epoch %i, train loss %f'
                                        ', train zo %f ' % \
                                        (self.epoch, this_train_loss,
                                         this_train_zero_one))
                        else:
                            logger.info('epoch %i, train loss %f ' % \
                                        (self.epoch, this_train_loss))

                    self.optional_output(train_set_x, show_norms, show_output)
Ejemplo n.º 37
0
 def get_average_heart_rate(self, data):
     return np.average(data[:, 0])
Ejemplo n.º 38
0
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)


x_batch = x_train[:3]
t_batch = t_train[:3]

print(t_batch)
print(x_batch.shape)

#gradient = backpropagation
#numerical_gradient = 수치미분
grad_numerical = network.numerical_gradient(x_batch,t_batch)
grad_backprop = network.gradient(x_batch,t_batch)

for key in grad_numerical.keys():
	diff = np.average(np.abs(grad_backprop[key] - grad_numerical[key]))
	print(key + ":" + str(diff))

	
	
### 작성한 신경망
import train_neuralnet


#계산그래프
import numpy as np
import matplotlib.pylot as plt

x=np.arange(len(train_neuralnet.train_acc_list))
plt.plot(x, train_neuralnet.train_acc_list)
plt.plot(x, train_neuralnet.test_acc_list)
Ejemplo n.º 39
0
    def fit(self,
            X_train,
            Y_train,
            X_test=None,
            Y_test=None,
            validate_every=100,
            optimizer='sgd',
            compute_zero_one=False,
            show_norms=True,
            show_output=True):
        """ Fit model

        Pass in X_test, Y_test to compute test error and report during
        training.

        X_train : ndarray (T x n_in)
        Y_train : ndarray (T x n_out)

        validation_frequency : int
            in terms of number of epochs

        optimizer : string
            Optimizer type.
            Possible values:
                'sgd'  : batch stochastic gradient descent
                'cg'   : nonlinear conjugate gradient algorithm
                         (scipy.optimize.fmin_cg)
                'bfgs' : quasi-Newton method of Broyden, Fletcher, Goldfarb,
                         and Shanno (scipy.optimize.fmin_bfgs)
                'l_bfgs_b' : Limited-memory BFGS (scipy.optimize.fmin_l_bfgs_b)

        compute_zero_one : bool
            in the case of binary output, compute zero-one error in addition to
            cross-entropy error
        show_norms : bool
            Show L2 norms of individual parameter groups while training.
        show_output : bool
            Show the model output on first training case while training.
        """
        if X_test is not None:
            assert (Y_test is not None)
            self.interactive = True
            test_set_x, test_set_y = self.shared_dataset((X_test, Y_test))
        else:
            self.interactive = False

        train_set_x, train_set_y = self.shared_dataset((X_train, Y_train))

        if compute_zero_one:
            assert(self.output_type == 'binary' \
                   or self.output_type == 'softmax')
        # compute number of minibatches for training
        # note that cases are the second dimension, not the first
        n_train = train_set_x.get_value(borrow=True).shape[1]
        n_train_batches = int(np.ceil(1.0 * n_train / self.batch_size))
        if self.interactive:
            n_test = test_set_x.get_value(borrow=True).shape[1]
            n_test_batches = int(np.ceil(1.0 * n_test / self.batch_size))

        #validate_every is specified in terms of epochs
        validation_frequency = validate_every * n_train_batches

        ######################
        # BUILD ACTUAL MODEL #
        ######################
        logger.info('... building the model')

        index = T.lscalar('index')  # index to a [mini]batch
        n_ex = T.lscalar('n_ex')  # total number of examples
        # learning rate (may change)
        l_r = T.scalar('l_r', dtype=theano.config.floatX)
        mom = T.scalar('mom', dtype=theano.config.floatX)  # momentum

        cost = self.rnn.loss(self.y) \
            + self.L1_reg * self.rnn.L1 \
            + self.L2_reg * self.rnn.L2_sqr

        # Proper implementation of variable-batch size evaluation
        # Note that classifier.errors() returns the mean error
        # But the last batch may be a smaller size
        # So we keep around the effective_batch_size (whose last element may
        # be smaller than the rest)
        # And weight the reported error by the batch_size when we average
        # Also, by keeping batch_start and batch_stop as symbolic variables,
        # we make the theano function easier to read
        batch_start = index * self.batch_size
        batch_stop = T.minimum(n_ex, (index + 1) * self.batch_size)
        effective_batch_size = batch_stop - batch_start

        get_batch_size = theano.function(inputs=[index, n_ex],
                                         outputs=effective_batch_size)

        compute_train_error = theano.function(
            inputs=[index, n_ex],
            outputs=self.rnn.loss(self.y),
            givens={
                self.x: train_set_x[:, batch_start:batch_stop],
                self.y: train_set_y[:, batch_start:batch_stop]
            },
            mode=mode)

        if compute_zero_one:
            compute_train_zo = theano.function(
                inputs=[index, n_ex],
                outputs=self.rnn.errors(self.y),
                givens={
                    self.x: train_set_x[:, batch_start:batch_stop],
                    self.y: train_set_y[:, batch_start:batch_stop]
                },
                mode=mode)

        if self.interactive:
            compute_test_error = theano.function(
                inputs=[index, n_ex],
                outputs=self.rnn.loss(self.y),
                givens={
                    self.x: test_set_x[:, batch_start:batch_stop],
                    self.y: test_set_y[:, batch_start:batch_stop]
                },
                mode=mode)

            if compute_zero_one:
                compute_test_zo = theano.function(
                    inputs=[index, n_ex],
                    outputs=self.rnn.errors(self.y),
                    givens={
                        self.x: test_set_x[:, batch_start:batch_stop],
                        self.y: test_set_y[:, batch_start:batch_stop]
                    },
                    mode=mode)

        self.get_norms = {}
        for param in self.rnn.params:
            self.get_norms[param] = theano.function(
                inputs=[], outputs=self.rnn.l2_norms[param], mode=mode)

        # compute the gradient of cost with respect to theta using BPTT
        gtheta = T.grad(cost, self.rnn.theta)

        if optimizer == 'sgd':

            updates = {}
            theta = self.rnn.theta
            theta_update = self.rnn.theta_update
            # careful here, update to the shared variable
            # cannot depend on an updated other shared variable
            # since updates happen in parallel
            # so we need to be explicit
            upd = mom * theta_update - l_r * gtheta
            updates[theta_update] = upd
            updates[theta] = theta + upd

            # compiling a Theano function `train_model` that returns the
            # cost, but in the same time updates the parameter of the
            # model based on the rules defined in `updates`
            train_model = theano.function(
                inputs=[index, n_ex, l_r, mom],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[:, batch_start:batch_stop],
                    self.y: train_set_y[:, batch_start:batch_stop]
                },
                mode=mode)

            ###############
            # TRAIN MODEL #
            ###############
            logger.info('... training')
            epoch = 0

            while (epoch < self.n_epochs):
                epoch = epoch + 1
                effective_momentum = self.final_momentum \
                                     if epoch > self.momentum_switchover \
                                     else self.initial_momentum

                for minibatch_idx in xrange(n_train_batches):
                    minibatch_avg_cost = train_model(minibatch_idx, n_train,
                                                     self.learning_rate,
                                                     effective_momentum)

                    # iteration number (how many weight updates have we made?)
                    # epoch is 1-based, index is 0 based
                    iter = (epoch - 1) * n_train_batches + minibatch_idx + 1

                    if iter % validation_frequency == 0:
                        # compute loss on training set
                        train_losses = [
                            compute_train_error(i, n_train)
                            for i in xrange(n_train_batches)
                        ]
                        train_batch_sizes = [
                            get_batch_size(i, n_train)
                            for i in xrange(n_train_batches)
                        ]

                        this_train_loss = np.average(train_losses,
                                                     weights=train_batch_sizes)

                        if compute_zero_one:
                            train_zero_one = [
                                compute_train_zo(i, n_train)
                                for i in xrange(n_train_batches)
                            ]

                            this_train_zero_one = np.average(
                                train_zero_one, weights=train_batch_sizes)

                        if self.interactive:
                            test_losses = [
                                compute_test_error(i, n_test)
                                for i in xrange(n_test_batches)
                            ]

                            test_batch_sizes = [
                                get_batch_size(i, n_test)
                                for i in xrange(n_test_batches)
                            ]

                            this_test_loss = np.average(
                                test_losses, weights=test_batch_sizes)

                            if compute_zero_one:
                                test_zero_one = [
                                    compute_test_zo(i, n_test)
                                    for i in xrange(n_test_batches)
                                ]

                                this_test_zero_one = np.average(
                                    test_zero_one, weights=test_batch_sizes)

                            if compute_zero_one:
                                logger.info('epoch %i, mb %i/%i, tr loss %f, '
                                            'tr zo %f, te loss %f '
                                            'te zo %f lr: %f' % \
                                        (epoch, minibatch_idx + 1,
                                         n_train_batches,
                                         this_train_loss, this_train_zero_one,
                                         this_test_loss, this_test_zero_one,
                                         self.learning_rate))
                            else:
                                logger.info('epoch %i, mb %i/%i, tr loss %f '
                                            'te loss %f lr: %f' % \
                                (epoch, minibatch_idx + 1, n_train_batches,
                                 this_train_loss, this_test_loss,
                                 self.learning_rate))

                        else:
                            if compute_zero_one:
                                logger.info(
                                    'epoch %i, mb %i/%i, train loss %f'
                                    ' train zo %f '
                                    'lr: %f' %
                                    (epoch, minibatch_idx + 1, n_train_batches,
                                     this_train_loss, this_train_zero_one,
                                     self.learning_rate))
                            else:
                                logger.info(
                                    'epoch %i, mb %i/%i, train loss %f'
                                    ' lr: %f' %
                                    (epoch, minibatch_idx + 1, n_train_batches,
                                     this_train_loss, self.learning_rate))

                        self.optional_output(train_set_x, show_norms,
                                             show_output)

                self.learning_rate *= self.learning_rate_decay

                if self.snapshot_every is not None:
                    if (epoch + 1) % self.snapshot_every == 0:
                        date_obj = datetime.datetime.now()
                        date_str = date_obj.strftime('%Y-%m-%d-%H:%M:%S')
                        class_name = self.__class__.__name__
                        fname = '%s.%s-snapshot-%d.pkl' % (class_name,
                                                           date_str, epoch + 1)
                        fabspath = os.path.join(self.snapshot_path, fname)
                        self.save(fpath=fabspath)

        elif optimizer == 'cg' or optimizer == 'bfgs' \
                 or optimizer == 'l_bfgs_b':
            # compile a theano function that returns the cost of a minibatch
            batch_cost = theano.function(
                inputs=[index, n_ex],
                outputs=cost,
                givens={
                    self.x: train_set_x[:, batch_start:batch_stop],
                    self.y: train_set_y[:, batch_start:batch_stop]
                },
                mode=mode,
                name="batch_cost")

            # compile a theano function that returns the gradient of the
            # minibatch with respect to theta
            batch_grad = theano.function(
                inputs=[index, n_ex],
                outputs=T.grad(cost, self.rnn.theta),
                givens={
                    self.x: train_set_x[:, batch_start:batch_stop],
                    self.y: train_set_y[:, batch_start:batch_stop]
                },
                mode=mode,
                name="batch_grad")

            # creates a function that computes the average cost on the training
            # set
            def train_fn(theta_value):
                self.rnn.theta.set_value(theta_value, borrow=True)
                train_losses = [
                    batch_cost(i, n_train) for i in xrange(n_train_batches)
                ]
                train_batch_sizes = [
                    get_batch_size(i, n_train) for i in xrange(n_train_batches)
                ]
                return np.average(train_losses, weights=train_batch_sizes)

            # creates a function that computes the average gradient of cost
            # with respect to theta
            def train_fn_grad(theta_value):
                self.rnn.theta.set_value(theta_value, borrow=True)

                train_grads = [
                    batch_grad(i, n_train) for i in xrange(n_train_batches)
                ]
                train_batch_sizes = [
                    get_batch_size(i, n_train) for i in xrange(n_train_batches)
                ]

                return np.average(train_grads,
                                  weights=train_batch_sizes,
                                  axis=0)

            # validation function, prints useful output after each iteration
            def callback(theta_value):
                self.epoch += 1
                if (self.epoch) % validate_every == 0:
                    self.rnn.theta.set_value(theta_value, borrow=True)
                    # compute loss on training set
                    train_losses = [
                        compute_train_error(i, n_train)
                        for i in xrange(n_train_batches)
                    ]
                    train_batch_sizes = [
                        get_batch_size(i, n_train)
                        for i in xrange(n_train_batches)
                    ]

                    this_train_loss = np.average(train_losses,
                                                 weights=train_batch_sizes)

                    if compute_zero_one:
                        train_zero_one = [
                            compute_train_zo(i, n_train)
                            for i in xrange(n_train_batches)
                        ]

                        this_train_zero_one = np.average(
                            train_zero_one, weights=train_batch_sizes)

                    if self.interactive:
                        test_losses = [
                            compute_test_error(i, n_test)
                            for i in xrange(n_test_batches)
                        ]

                        test_batch_sizes = [
                            get_batch_size(i, n_test)
                            for i in xrange(n_test_batches)
                        ]

                        this_test_loss = np.average(test_losses,
                                                    weights=test_batch_sizes)

                        if compute_zero_one:
                            test_zero_one = [
                                compute_test_zo(i, n_test)
                                for i in xrange(n_test_batches)
                            ]

                            this_test_zero_one = np.average(
                                test_zero_one, weights=test_batch_sizes)

                        if compute_zero_one:
                            logger.info('epoch %i, tr loss %f, '
                                        'tr zo %f, te loss %f '
                                            'te zo %f' % \
                                        (self.epoch, this_train_loss,
                                         this_train_zero_one, this_test_loss,
                                         this_test_zero_one))
                        else:
                            logger.info('epoch %i, tr loss %f, te loss %f' % \
                                        (self.epoch, this_train_loss,
                                         this_test_loss, self.learning_rate))

                    else:
                        if compute_zero_one:
                            logger.info('epoch %i, train loss %f'
                                        ', train zo %f ' % \
                                        (self.epoch, this_train_loss,
                                         this_train_zero_one))
                        else:
                            logger.info('epoch %i, train loss %f ' % \
                                        (self.epoch, this_train_loss))

                    self.optional_output(train_set_x, show_norms, show_output)

            ###############
            # TRAIN MODEL #
            ###############
            logger.info('... training')
            # using scipy conjugate gradient optimizer
            import scipy.optimize
            if optimizer == 'cg':
                of = scipy.optimize.fmin_cg
            elif optimizer == 'bfgs':
                of = scipy.optimize.fmin_bfgs
            elif optimizer == 'l_bfgs_b':
                of = scipy.optimize.fmin_l_bfgs_b
            logger.info("Optimizing using %s..." % of.__name__)
            start_time = time.clock()

            # keep track of epochs externally
            # these get updated through callback
            self.epoch = 0

            # interface to l_bfgs_b is different than that of cg, bfgs
            # however, this will be changed in scipy 0.11
            # unified under scipy.optimize.minimize
            if optimizer == 'cg' or optimizer == 'bfgs':
                best_theta = of(
                    f=train_fn,
                    x0=self.rnn.theta.get_value(),
                    # x0=np.zeros(self.rnn.theta.get_value().shape,
                    #             dtype=theano.config.floatX),
                    fprime=train_fn_grad,
                    callback=callback,
                    disp=1,
                    retall=1,
                    maxiter=self.n_epochs)
            elif optimizer == 'l_bfgs_b':
                best_theta, f_best_theta, info = of(
                    func=train_fn,
                    x0=self.rnn.theta.get_value(),
                    fprime=train_fn_grad,
                    iprint=validate_every,
                    maxfun=self.n_epochs)  # max number of feval

            end_time = time.clock()

            print "Optimization time: %f" % (end_time - start_time)

        else:
            raise NotImplementedError
Ejemplo n.º 40
0
    def camera_loop(self):
        global fig, ax
        global cameraOpen

        neuralModel = NeuralNetworkModel()

        neuralModelPickle = open("ModelStorage/nnlivetest.pickle", "rb")

        neuralModelDict = pickle.load(neuralModelPickle)

        video_capture = cv2.VideoCapture(0)

        frame_count = 0
        openness_average = []
        openness_min = 1
        openness_max = 0

        extraversion_average = []
        extraversion_min = 1
        extraversion_max = 0

        neuroticism_average = []
        neuroticism_min = 1
        neuroticism_max = 0

        agreeableness_average = []
        agreeableness_min = 1
        agreeableness_max = 0

        conscientiousness_average = []
        conscientiousness_min = 1
        conscientiousness_max = 0

        while cameraOpen:
            ret, frame = video_capture.read()

            font = cv2.FONT_HERSHEY_SIMPLEX
            color = (255, 255, 255)

            valueUpdated = False

            if frame_count % 15 is 0:
                temp_openness, temp_extraversion, temp_neuroticism, temp_agreeableness, temp_conscientiousness = neuralModel.predict_single_frame(
                    frame, neuralModelDict)

                if temp_openness["average"] is not -1:
                    openness_average.append(temp_openness["average"])
                    if temp_openness["min"] < openness_min:
                        openness_min = temp_openness["min"]
                    if temp_openness["max"] > openness_max:
                        openness_max = temp_openness["max"]

                    valueUpdated = True

                if temp_extraversion["average"] is not -1:
                    extraversion_average.append(temp_extraversion["average"])
                    if temp_extraversion["min"] < extraversion_min:
                        extraversion_min = temp_extraversion["min"]
                    if temp_extraversion["max"] > extraversion_max:
                        extraversion_max = temp_extraversion["max"]

                    valueUpdated = True

                if temp_agreeableness["average"] is not -1:
                    agreeableness_average.append(temp_agreeableness["average"])
                    if temp_agreeableness["min"] < agreeableness_min:
                        agreeableness_min = temp_agreeableness["min"]
                    if temp_agreeableness["max"] > agreeableness_max:
                        agreeableness_max = temp_agreeableness["max"]

                    valueUpdated = True

                if temp_conscientiousness["average"] is not -1:
                    conscientiousness_average.append(
                        temp_conscientiousness["average"])
                    if temp_conscientiousness["min"] < conscientiousness_min:
                        conscientiousness_min = temp_conscientiousness["min"]
                    if temp_conscientiousness["max"] > conscientiousness_max:
                        conscientiousness_max = temp_conscientiousness["max"]

                    valueUpdated = True

                if temp_neuroticism["average"] is not -1:
                    neuroticism_average.append(temp_neuroticism["average"])
                    if temp_neuroticism["min"] < neuroticism_min:
                        neuroticism_min = temp_neuroticism["min"]
                    if temp_neuroticism["max"] > neuroticism_max:
                        neuroticism_max = temp_neuroticism["max"]

                    valueUpdated = True

                font = cv2.FONT_HERSHEY_SIMPLEX

            if valueUpdated:
                raw_data = {
                    'trait_name': [
                        'Openness', 'Extraversion', 'Agreeableness',
                        'Neuroticism', 'Conscientiousness'
                    ],
                    'avg': [
                        np.average(openness_average),
                        np.average(extraversion_average),
                        np.average(agreeableness_average),
                        np.average(neuroticism_average),
                        np.average(conscientiousness_average)
                    ],
                    'min': [
                        openness_min, extraversion_min, agreeableness_min,
                        neuroticism_min, conscientiousness_min
                    ],
                    'max': [
                        openness_max, extraversion_max, agreeableness_max,
                        neuroticism_max, conscientiousness_max
                    ]
                }
                df = pd.DataFrame(raw_data,
                                  columns=['trait_name', 'avg', 'min', 'max'])
                graphThread = threading.Thread(target=self.update_graph,
                                               args=(df, ))
                graphThread.start()

                self.Otable.setItem(
                    0, 0,
                    QtWidgets.QTableWidgetItem("{0:.4f}".format(
                        np.average(openness_average))))
                self.Otable.setItem(
                    1, 0,
                    QtWidgets.QTableWidgetItem("{0:.4f}".format(openness_min)))
                self.Otable.setItem(
                    2, 0,
                    QtWidgets.QTableWidgetItem("{0:.4f}".format(openness_max)))

                self.Atable.setItem(
                    0, 0,
                    QtWidgets.QTableWidgetItem("{0:.4f}".format(
                        np.average(agreeableness_average))))
                self.Atable.setItem(
                    1, 0,
                    QtWidgets.QTableWidgetItem(
                        "{0:.4f}".format(agreeableness_min)))
                self.Atable.setItem(
                    2, 0,
                    QtWidgets.QTableWidgetItem(
                        "{0:.4f}".format(agreeableness_max)))

                self.Etable.setItem(
                    0, 0,
                    QtWidgets.QTableWidgetItem("{0:.4f}".format(
                        np.average(extraversion_average))))
                self.Etable.setItem(
                    1, 0,
                    QtWidgets.QTableWidgetItem(
                        "{0:.4f}".format(extraversion_min)))
                self.Etable.setItem(
                    2, 0,
                    QtWidgets.QTableWidgetItem(
                        "{0:.4f}".format(extraversion_max)))

                self.Ctable.setItem(
                    0, 0,
                    QtWidgets.QTableWidgetItem("{0:.4f}".format(
                        np.average(conscientiousness_average))))
                self.Ctable.setItem(
                    1, 0,
                    QtWidgets.QTableWidgetItem(
                        "{0:.4f}".format(conscientiousness_min)))
                self.Ctable.setItem(
                    2, 0,
                    QtWidgets.QTableWidgetItem(
                        "{0:.4f}".format(conscientiousness_max)))

                self.Ntable.setItem(
                    0, 0,
                    QtWidgets.QTableWidgetItem("{0:.4f}".format(
                        np.average(neuroticism_average))))
                self.Ntable.setItem(
                    1, 0,
                    QtWidgets.QTableWidgetItem(
                        "{0:.4f}".format(neuroticism_min)))
                self.Ntable.setItem(
                    2, 0,
                    QtWidgets.QTableWidgetItem(
                        "{0:.4f}".format(neuroticism_max)))

            cv2.putText(
                frame, "Openness (Avg) = {0:.4f}".format(
                    np.average(openness_average)), (10, 20), font, 0.45, color)
            cv2.putText(
                frame, "Extraversion (Avg) = {0:.4f}".format(
                    np.average(extraversion_average)), (10, 40), font, 0.45,
                color)
            cv2.putText(
                frame, "Neuroticism (Avg) = {0:.4f}".format(
                    np.average(neuroticism_average)), (10, 60), font, 0.45,
                color)
            cv2.putText(
                frame, "Agreeableness (Avg) = {0:.4f}".format(
                    np.average(agreeableness_average)), (10, 80), font, 0.45,
                color)
            cv2.putText(
                frame, "Consentiousness (Avg)= {0:.4f}".format(
                    np.average(conscientiousness_average)), (10, 100), font,
                0.45, color)

            cv2.putText(frame, "Openness (Min) = {0:.4f}".format(openness_min),
                        (10, 140), font, 0.45, color)
            cv2.putText(
                frame, "Extraversion (Min) = {0:.4f}".format(extraversion_min),
                (10, 160), font, 0.45, color)
            cv2.putText(frame,
                        "Neuroticism (Min) = {0:.4f}".format(neuroticism_min),
                        (10, 180), font, 0.45, color)
            cv2.putText(
                frame,
                "Agreeableness (Min) = {0:.4f}".format(agreeableness_min),
                (10, 200), font, 0.45, color)
            cv2.putText(
                frame,
                "Consentiousness (Min)= {0:.4f}".format(conscientiousness_min),
                (10, 220), font, 0.45, color)

            cv2.putText(frame, "Openness (Max) = {0:.4f}".format(openness_max),
                        (10, 260), font, 0.45, color)
            cv2.putText(
                frame, "Extraversion (Max) = {0:.4f}".format(extraversion_max),
                (10, 280), font, 0.45, color)
            cv2.putText(frame,
                        "Neuroticism (Max) = {0:.4f}".format(neuroticism_max),
                        (10, 300), font, 0.45, color)
            cv2.putText(
                frame,
                "Agreeableness (Max) = {0:.4f}".format(agreeableness_max),
                (10, 320), font, 0.45, color)
            cv2.putText(
                frame,
                "Consentiousness (Max)= {0:.4f}".format(conscientiousness_max),
                (10, 340), font, 0.45, color)

            frame_count += 1
            cv2.imshow("output", frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        video_capture.release()
        cv2.destroyAllWindows()
        plt.close()
Ejemplo n.º 41
0
"""
Sacar el promedio de edades de una cantidad de estudiantes
"""

from numpy import average

students = input('How many students do you have? ')
agesBySpace = input('Enter ages by space: ')

ages = list(map(int, agesBySpace.split(' ')))

result = average(ages)

print('The ages are {}, and the average is {}'.format(ages, result))
    ''' Calculates the bit error rate '''
    number_of_bits = np.size(source)
    return np.sum(np.bitwise_xor(bits, received_bits))/number_of_bits


def plot_ber(SNRdB, ber):
    ''' Plot the bit error rate '''
    plt.figure(0, (16, 9))
    plt.semilogy(SNRdB, ber)
    plt.grid(True)
    plt.xlabel('SNR (dB)')
    plt.xlim((min(SNRdB), max(SNRdB)))
    plt.ylabel('Bit Error Rate')
    plt.title(f'{M}-PSK SNR vs BER')
    plt.show()


ber = []

for snr in SNR:
    errors = []
    for i in range(averages):
        bits = generate_bits(N)
        signal = modulate_bits(bits)
        received_signal = apply_channel(signal, snr)
        received_bits = demodulate_signal(signal)
        errors.append(compute_bit_error_rate(bits, received_bits))
    ber.append(np.average(errors))

plot_ber(SNRdB, ber)
Ejemplo n.º 43
0
c = 1 / np.sqrt(epsilon_0 * mu_0)
ccm = 100 * c

print(dt)

print("a0:\t{}\nE_hartree:\t{}\n1 a.u:\t{}\ndt: {} fs\nc:\t{}e10 cm/s".format(
    a0, E_hartree, t_au, dt / 1e-15, ccm / 1e10))

# get cqs
cq1 = np.array(d.Cl1)
cq12 = np.array(d.Cl12)
cq18 = np.array(d.Cl18)
cq24 = np.array(d.Cl24)
# add and normalize
signal1 = (cq1)  # + cq12 + cq18 + cq24)
signal1 = signal1 - np.average(signal1)
signal1 = signal1 / max(signal1)
# fourier
fourier1 = np.absolute(fft.rfft(signal1))

signal12 = (cq12)  # + cq12 + cq18 + cq24)
signal12 = signal1 - np.average(signal1)
signal12 = signal12 / max(signal1)
# fourier
fourier12 = np.absolute(fft.rfft(signal12))

signal18 = (cq18)  # + cq12 + cq18 + cq24)
signal18 = signal18 - np.average(signal18)
signal18 = signal18 / max(signal18)
# fourier
fourier18 = np.absolute(fft.rfft(signal18))
Ejemplo n.º 44
0
def evaluate_beam_search(generator, data_loader, opt, title='', epoch=1, predict_save_path=None):
    logger = config.init_logging(title, predict_save_path + '/%s.log' % title, redirect_to_stdout=False)
    progbar = Progbar(logger=logger, title=title, target=len(data_loader.dataset.examples), batch_size=data_loader.batch_size,
                      total_examples=len(data_loader.dataset.examples))

    topk_range = [5, 10]
    score_names = ['precision', 'recall', 'f_score']

    example_idx = 0
    score_dict = {}  # {'precision@5':[],'recall@5':[],'f1score@5':[], 'precision@10':[],'recall@10':[],'f1score@10':[]}
    for i, batch in enumerate(data_loader):
        # if i > 5:
        #     break
        one2many_batch, one2one_batch = batch
        src_list, src_len, trg_list, _, trg_copy_target_list, src_oov_map_list, oov_list, src_str_list, trg_str_list = one2many_batch

        if torch.cuda.is_available():
            src_list = src_list.cuda()
            src_oov_map_list = src_oov_map_list.cuda()

        print("batch size - %s" % str(src_list.size(0)))
        print("src size - %s" % str(src_list.size()))
        print("target size - %s" % len(trg_copy_target_list))

        pred_seq_list = generator.beam_search(src_list, src_len, src_oov_map_list, oov_list, opt.word2id)

        '''
        process each example in current batch
        '''
        for src, src_str, trg, trg_str_seqs, trg_copy, pred_seq, oov in zip(src_list, src_str_list, trg_list, trg_str_list, trg_copy_target_list, pred_seq_list, oov_list):
            logger.info('======================  %d =========================' % (i))
            print_out = ''
            print_out += '[Source][%d]: %s \n' % (len(src_str), ' '.join(src_str))
            src = src.cpu().data.numpy() if torch.cuda.is_available() else src.data.numpy()
            print_out += '\nSource Input: \n %s\n' % (' '.join([opt.id2word[x] for x in src[:len(src_str) + 5]]))
            print_out += 'Real Target String [%d] \n\t\t%s \n' % (len(trg_str_seqs), trg_str_seqs)
            print_out += 'Real Target Input:  \n\t\t%s \n' % str([[opt.id2word[x] for x in t] for t in trg])
            print_out += 'Real Target Copy:   \n\t\t%s \n' % str([[opt.id2word[x] if x < opt.vocab_size else oov[x - opt.vocab_size] for x in t] for t in trg_copy])
            trg_str_is_present_flags, _ = if_present_duplicate_phrases(src_str, trg_str_seqs)

            # ignore the cases that there's no present phrases
            if opt.must_appear_in_src and np.sum(trg_str_is_present_flags) == 0:
                logger.error('found no present targets')
                continue

            print_out += '[GROUND-TRUTH] #(present)/#(all targets)=%d/%d\n' % (sum(trg_str_is_present_flags), len(trg_str_is_present_flags))
            print_out += '\n'.join(['\t\t[%s]' % ' '.join(phrase) if is_present else '\t\t%s' % ' '.join(phrase) for phrase, is_present in zip(trg_str_seqs, trg_str_is_present_flags)])
            print_out += '\noov_list:   \n\t\t%s \n' % str(oov)

            # 1st filtering
            pred_is_valid_flags, processed_pred_seqs, processed_pred_str_seqs, processed_pred_score = process_predseqs(pred_seq, oov, opt.id2word, opt)
            # 2nd filtering: if filter out phrases that don't appear in text, and keep unique ones after stemming
            if opt.must_appear_in_src:
                pred_is_present_flags, _ = if_present_duplicate_phrases(src_str, processed_pred_str_seqs)
                filtered_trg_str_seqs = np.asarray(trg_str_seqs)[trg_str_is_present_flags]
            else:
                pred_is_present_flags = [True] * len(processed_pred_str_seqs)

            valid_and_present = np.asarray(pred_is_valid_flags) * np.asarray(pred_is_present_flags)
            match_list = get_match_result(true_seqs=filtered_trg_str_seqs, pred_seqs=processed_pred_str_seqs)
            print_out += '[PREDICTION] #(valid)=%d, #(present)=%d, #(retained&present)=%d, #(all)=%d\n' % (sum(pred_is_valid_flags), sum(pred_is_present_flags), sum(valid_and_present), len(pred_seq))
            print_out += ''
            '''
            Print and export predictions
            '''
            preds_out = ''
            for p_id, (seq, word, score, match, is_valid, is_present) in enumerate(
                    zip(processed_pred_seqs, processed_pred_str_seqs, processed_pred_score, match_list, pred_is_valid_flags, pred_is_present_flags)):
                # if p_id > 5:
                #     break

                preds_out += '%s\n' % (' '.join(word))
                if is_present:
                    print_phrase = '[%s]' % ' '.join(word)
                else:
                    print_phrase = ' '.join(word)

                if is_valid:
                    print_phrase = '*%s' % print_phrase

                if match == 1.0:
                    correct_str = '[correct!]'
                else:
                    correct_str = ''
                if any([t >= opt.vocab_size for t in seq.sentence]):
                    copy_str = '[copied!]'
                else:
                    copy_str = ''

                print_out += '\t\t[%.4f]\t%s \t %s %s%s\n' % (-score, print_phrase, str(seq.sentence), correct_str, copy_str)

            '''
            Evaluate predictions w.r.t different filterings and metrics
            '''
            processed_pred_seqs = np.asarray(processed_pred_seqs)[valid_and_present]
            filtered_processed_pred_str_seqs = np.asarray(processed_pred_str_seqs)[valid_and_present]
            filtered_processed_pred_score = np.asarray(processed_pred_score)[valid_and_present]

            # 3rd round filtering (one-word phrases)
            num_oneword_seq = -1
            filtered_pred_seq, filtered_pred_str_seqs, filtered_pred_score = post_process_predseqs((processed_pred_seqs, filtered_processed_pred_str_seqs, filtered_processed_pred_score), num_oneword_seq)

            match_list_exact = get_match_result(true_seqs=filtered_trg_str_seqs, pred_seqs=filtered_pred_str_seqs, type='exact')
            match_list_soft = get_match_result(true_seqs=filtered_trg_str_seqs, pred_seqs=filtered_pred_str_seqs, type='partial')

            assert len(filtered_pred_seq) == len(filtered_pred_str_seqs) == len(filtered_pred_score) == len(match_list_exact) == len(match_list_soft)

            print_out += "\n ======================================================="
            print_pred_str_seqs = [" ".join(item) for item in filtered_pred_str_seqs]
            print_trg_str_seqs = [" ".join(item) for item in filtered_trg_str_seqs]
            # print_out += "\n PREDICTION: " + " / ".join(print_pred_str_seqs)
            # print_out += "\n GROUND TRUTH: " + " / ".join(print_trg_str_seqs)
            for topk in topk_range:
                results_exact = evaluate(match_list_exact, filtered_pred_str_seqs, filtered_trg_str_seqs, topk=topk)
                for k, v in zip(score_names, results_exact):
                    if '%s@%d_exact' % (k, topk) not in score_dict:
                        score_dict['%s@%d_exact' % (k, topk)] = []
                    score_dict['%s@%d_exact' % (k, topk)].append(v)

                print_out += "\n ------------------------------------------------- EXACT, k=%d" % (topk)
                print_out += "\n --- batch precision, recall, fscore: " + str(results_exact[0]) + " , " + str(results_exact[1]) + " , " + str(results_exact[2])
                print_out += "\n --- total precision, recall, fscore: " + str(np.average(score_dict['precision@%d_exact' % (topk)])) + " , " +\
                            str(np.average(score_dict['recall@%d_exact' % (topk)])) + " , " +\
                            str(np.average(score_dict['f_score@%d_exact' % (topk)]))

            for topk in topk_range:
                results_soft = evaluate(match_list_soft, filtered_pred_str_seqs, filtered_trg_str_seqs, topk=topk)
                for k, v in zip(score_names, results_soft):
                    if '%s@%d_soft' % (k, topk) not in score_dict:
                        score_dict['%s@%d_soft' % (k, topk)] = []
                    score_dict['%s@%d_soft' % (k, topk)].append(v)

                print_out += "\n ------------------------------------------------- SOFT, k=%d" % (topk)
                print_out += "\n --- batch precision, recall, fscore: " + str(results_soft[0]) + " , " + str(results_soft[1]) + " , " + str(results_soft[2])
                print_out += "\n --- total precision, recall, fscore: " + str(np.average(score_dict['precision@%d_soft' % (topk)])) + " , " +\
                            str(np.average(score_dict['recall@%d_soft' % (topk)])) + " , " +\
                            str(np.average(score_dict['f_score@%d_soft' % (topk)]))

            print_out += "\n ======================================================="
            logger.info(print_out)

            '''
            write predictions to disk
            '''
            if predict_save_path:
                if not os.path.exists(os.path.join(predict_save_path, title + '_detail')):
                    os.makedirs(os.path.join(predict_save_path, title + '_detail'))
                with open(os.path.join(predict_save_path, title + '_detail', str(example_idx) + '_print.txt'), 'w') as f_:
                    f_.write(print_out)
                with open(os.path.join(predict_save_path, title + '_detail', str(example_idx) + '_prediction.txt'), 'w') as f_:
                    f_.write(preds_out)

                out_dict = {}
                out_dict['src_str'] = src_str
                out_dict['trg_str'] = trg_str_seqs
                out_dict['trg_present_flag'] = trg_str_is_present_flags
                out_dict['pred_str'] = processed_pred_str_seqs
                out_dict['pred_score'] = [float(s) for s in processed_pred_score]
                out_dict['present_flag'] = pred_is_present_flags
                out_dict['valid_flag'] = pred_is_valid_flags
                out_dict['match_flag'] = [float(m) for m in match_list]

                for k,v in out_dict.items():
                    out_dict[k] = list(v)
                    # print('len(%s) = %d' % (k, len(v)))

                # print(out_dict)

                assert len(out_dict['trg_str']) == len(out_dict['trg_present_flag'])
                assert len(out_dict['pred_str']) == len(out_dict['present_flag']) \
                       == len(out_dict['valid_flag']) == len(out_dict['match_flag']) == len(out_dict['pred_score'])

                with open(os.path.join(predict_save_path, title + '_detail', str(example_idx) + '.json'), 'w') as f_:
                    f_.write(json.dumps(out_dict))

            progbar.update(epoch, example_idx, [('f_score@5_exact', np.average(score_dict['f_score@5_exact'])),
                                                ('f_score@5_soft', np.average(score_dict['f_score@5_soft'])),
                                                ('f_score@10_exact', np.average(score_dict['f_score@10_exact'])),
                                                ('f_score@10_soft', np.average(score_dict['f_score@10_soft'])),])

            example_idx += 1

    # print('#(f_score@5#oneword=-1)=%d, sum=%f' % (len(score_dict['f_score@5#oneword=-1']), sum(score_dict['f_score@5#oneword=-1'])))
    # print('#(f_score@10#oneword=-1)=%d, sum=%f' % (len(score_dict['f_score@10#oneword=-1']), sum(score_dict['f_score@10#oneword=-1'])))
    # print('#(f_score@5#oneword=1)=%d, sum=%f' % (len(score_dict['f_score@5#oneword=1']), sum(score_dict['f_score@5#oneword=1'])))
    # print('#(f_score@10#oneword=1)=%d, sum=%f' % (len(score_dict['f_score@10#oneword=1']), sum(score_dict['f_score@10#oneword=1'])))

    if predict_save_path:
        # export scores. Each row is scores (precision, recall and f-score) of different way of filtering predictions (how many one-word predictions to keep)
        with open(predict_save_path + os.path.sep + title + '_result.csv', 'w') as result_csv:
            csv_lines = []
            for mode in ["exact", "soft"]:
                for topk in topk_range:
                    csv_line = ""
                    for k in score_names:
                        csv_line += ',%f' % np.average(score_dict['%s@%d_%s' % (k, topk, mode)])
                    csv_lines.append(csv_line + '\n')

            result_csv.writelines(csv_lines)

    # precision, recall, f_score = macro_averaged_score(precisionlist=score_dict['precision'], recalllist=score_dict['recall'])
    # logging.info("Macro@5\n\t\tprecision %.4f\n\t\tmacro recall %.4f\n\t\tmacro fscore %.4f " % (np.average(score_dict['precision@5']), np.average(score_dict['recall@5']), np.average(score_dict['f1score@5'])))
    # logging.info("Macro@10\n\t\tprecision %.4f\n\t\tmacro recall %.4f\n\t\tmacro fscore %.4f " % (np.average(score_dict['precision@10']), np.average(score_dict['recall@10']), np.average(score_dict['f1score@10'])))
    # precision, recall, f_score = evaluate(true_seqs=target_all, pred_seqs=prediction_all, topn=5)
    # logging.info("micro precision %.4f , micro recall %.4f, micro fscore %.4f " % (precision, recall, f_score))

    for k,v in score_dict.items():
        print('#(%s) = %d' % (k, len(v)))

    return score_dict
Ejemplo n.º 45
0
            next_S, R, terminate, timeout = env.step(A)
            if verbose:
                print("Old state:", np.round(old_state, 3), "-->", "Action:",
                      A, "-->", "New state:", np.round(next_S, 3))
            cumulative_reward += R
            if terminate or timeout:
                if verbose:
                    print("\n## Reset ##\n")
                if terminate:
                    terminations += 1
                    successful_episode_steps.append(env.episode_step_count)
                env.reset()

        print("Number of steps per episode:", summary['steps_per_episode'])
        print("Number of episodes that reached the end:", terminations)
        average_length = np.average(successful_episode_steps) if len(
            successful_episode_steps) > 0 else np.inf
        print("The average number of steps per episode was:", average_length)
        print("Cumulative reward:", cumulative_reward)
        print("\n\n")

    if pumping_action_test:
        print("==== Results with Pumping Action Policy ====")

        config.current_step = 0
        summary = {}

        env = MountainCar(config, summary=summary)

        for i in range(steps):
            current_state = env.get_current_state()