Ejemplo n.º 1
0
    def __call__(self, requestsize=1, write=False):
        """Block the calling program if the throttle time has not expired.

        Parameter requestsize is the number of Pages to be read/written;
        multiply delay time by an appropriate factor.

        Because this seizes the throttle lock, it will prevent any other
        thread from writing to the same site until the wait expires.

        """
        self.lock.acquire()
        try:
            wait = self.waittime(write=write)
            # Calculate the multiplicity of the next delay based on how
            # big the request is that is being posted now.
            # We want to add "one delay" for each factor of two in the
            # size of the request. Getting 64 pages at once allows 6 times
            # the delay time for the server.
            self.next_multiplicity = math.log(1 + requestsize) / math.log(2.0)

            self.wait(wait)

            if write:
                self.last_write = time.time()
            else:
                self.last_read = time.time()
        finally:
            self.lock.release()
Ejemplo n.º 2
0
def get_nth_prime(n):
    """
    Get nth prime number
    >>> get_nth_prime(-1)
    Traceback (most recent call last):
    ...
    ValueError: n is -1 but expected be a positive integer
    >>> get_nth_prime(4)
    7
    >>> get_nth_prime(10001)
    104743
    >>> get_nth_prime(50000)
    611953
    """
    if n < 1 or n != int(n):
        raise ValueError("n is {0} but expected be a positive integer".format(n))
    if n == 1:
        return 2
    if n == 2:
        return 3
    if n == 3:
        return 5
    if n == 4:
        return 7
    if n == 5:
        return 11
    import math

    upper = n * math.log(n)
    upper += upper * math.log(n)
    upper = math.ceil(upper)  # use rosser's theorem for upper bound
    return get_primes(upper)[n - 1]
Ejemplo n.º 3
0
def entropy_root(lstOfLst):
      target_lst = []
      dict = {'0':0}
      rows = len(lstOfLst) - 1
      target_col = len(lstOfLst[0]) - 1
#      print "Rows in example set: ",rows+1," and index of target set: ", target_col
      n = 0
      entropy = 0
      while n <= rows:
          target_lst.append(lstOfLst[n][target_col])
          n = n +1
      target_lst1 = list(set(target_lst))    
#      print target_lst,target_lst1
      n1 = len(target_lst1)
      n2 = len(target_lst)
      while n1 > 0:
          k = n2
          cnt = 0
          while k > 0:
              if target_lst[k-1] == target_lst1[n1-1]:
                  cnt = cnt + 1
#                  print "Inside count",cnt,target_lst[k-1],target_lst1[n1-1]
              k = k - 1 
          dict.update({target_lst1[n1-1]:cnt})    
          n1 = n1 -1
#      print dict    
      n1 = len(target_lst1)
      while n1 > 0:
#          chr = str(dict[target_lst1[n1 - 1]])
#          print dict[target_lst1[n1 - 1]]
          p = float(dict[target_lst1[n1 - 1]])/float((rows + 1))
#          print p
          entropy = entropy + (p * (math.log(p)/math.log(2)))
          n1 = n1 - 1
      return (-1*entropy)
Ejemplo n.º 4
0
 def _get_value_log(self, x, mu, v):
     """log basic 2"""
     try:
         return loggamma(x+v) - loggamma(x+1) - loggamma(v) + v*log(v) - v*log(v+mu) + x*log(mu) - x*log(v+mu)
     except ValueError:
         #print('_get_value_log ValueError', x, mu, v, file=sys.stderr)
         return 1
Ejemplo n.º 5
0
	def getFitness(self, tagList):

		tagList = list(tagList)

		# add start symbols and end symbols
		for i in range(self.N - 1):
			tagList.insert(0, '^')
			tagList.append('$')

		# initialize the variables
		answer = float(0.0)

		# calculate numerator & denominator
		length = len(tagList)
		# print "----- before calculation -----"
		for start in range(length - self.N + 1):
			tmp = []
			for index in range(self.N):
				tmp.append(tagList[start+index]) 
			gramTuple = tuple(tmp) # now gramTuple is the tuple for this NGRAM (self).

			gramTupleProb = self.getProb(gramTuple)
			answer += math.log(gramTupleProb)
			if start != 0:
				prefixGramTuple = self.getPrefixGram(gramTuple)
				prefixGramTupleProb = self.prefixNGRAM.getProb(prefixGramTuple)
				answer -= math.log(prefixGramTupleProb)
			# print "numerator = %f, denominator = %f, answer = %f" % (numerator, denominator, answer)
		# print "----- after calculation -----"
		# special casef

		return answer
Ejemplo n.º 6
0
 def logadd(x,y):
     """ A helper function for log addition """
     from math import log,exp
     if x>y:
         return x+log(1.+exp(y-x))
     else:
         return y+log(1.+exp(x-y))
Ejemplo n.º 7
0
def make_dictionary(file_array):

	cufflinks_dict={}

        for i in range(0,len(file_array)):
		if file_array[i] != '':
			prelim_info_list=[]
                	each_gene_list=file_array[i].split("\t")
						
			try:	
				##Prelimnary Info
				entry_name=each_gene_list[0]
				Gene_ID=each_gene_list[3]
				Gene_Name=each_gene_list[4]
				tss_id=each_gene_list[5]
				locus=each_gene_list[6]
				length=each_gene_list[7]
				coverage=each_gene_list[8]
				FPKM=each_gene_list[9]
				
				log2_FPKM=math.log(float(FPKM)+1)/math.log(2)
				
				if entry_name not in cufflinks_dict:
					cufflinks_dict[entry_name]=entry_name+"\t"+Gene_ID+"\t"+Gene_Name+"\t"+tss_id+"\t"+locus+"\t"+str("%.4f" % log2_FPKM)
				else:
					pass
			except:
				pass
	
	return cufflinks_dict
Ejemplo n.º 8
0
 def increase(self):
     value=math.log(self.read())
     value+=.25
     if math.exp(value)>=self.maxvalue:
         value=math.log(self.maxvalue)
     print (value)
     self.write(int(math.exp(value)))
Ejemplo n.º 9
0
Archivo: main.py Proyecto: fei6409/IR
def TF_IDF():
    print('Doing TF_IDF', file=sys.stderr)
    global TFIDF, docWeight, index

    if os.path.isfile('TFIDF.dat') and os.path.isfile('docWeight.dat') and os.path.isfile('index.dat'): 
        f = open('TFIDF.dat', 'rb')
        TFIDF = pickle.load(f)
        f.close()
        f = open('docWeight.dat', 'rb')
        docWeight = pickle.load(f)
        f.close()
        f = open('index.dat', 'rb')
        index = pickle.load(f)
        f.close()

    else:
        print('.dat not exist, generating', file=sys.stderr)

        TFIDF = {}
        docCnt = len(docSize)
        avgSize = 0
        index = [[] for i in range(docCnt)]
        for i in range(docCnt):
            avgSize += docSize[i]
        avgSize /= docCnt
        
        docWeight = [0 for i in range(docCnt)]
        para_b = 0.7 # tuning
        d = [(1 - para_b + para_b*docSize[i]/avgSize) for i in range(docCnt)]


        for i in invIndexUnigram: # word id
            IDF = math.log( docCnt / len(invIndexUnigram[i]) )
            TFIDF[i] = {}
            for j in invIndexUnigram[i]: # doc id
                v =  (invIndexUnigram[i][j] / d[j]) * IDF
                TFIDF[i][j] = v
                docWeight[j] += v * v
                index[j].append(i)

        for i in invIndexBigram: # word id
            IDF = math.log( docCnt / len(invIndexBigram[i]) )
            TFIDF[i] = {}
            for j in invIndexBigram[i]: # doc id
                v =  (invIndexBigram[i][j] / d[j]) * IDF
                TFIDF[i][j] = v
                docWeight[j] += v * v
                index[j].append(i)

        f = open('TFIDF.dat', 'wb')
        pickle.dump(TFIDF, f)
        f.close()
        f = open('docWeight.dat', 'wb')
        pickle.dump(docWeight, f)
        f.close()
        f = open('index.dat', 'wb')
        pickle.dump(index, f)
        f.close()

    printTime()
Ejemplo n.º 10
0
    def optimize_hyperparameters(self, samples=5, step=3.0):
        old_hyper_parameters = [math.log(self._alpha_alpha), math.log(self._alpha_beta)]
        
        for ii in xrange(samples):
            log_likelihood_old = self.compute_likelihood(self._alpha_alpha, self._alpha_beta)
            log_likelihood_new = math.log(random.random()) + log_likelihood_old
            #print("OLD: %f\tNEW: %f at (%f, %f)" % (log_likelihood_old, log_likelihood_new, self._alpha_alpha, self._alpha_beta))

            l = [x - random.random() * step for x in old_hyper_parameters]
            r = [x + step for x in old_hyper_parameters]

            for jj in xrange(self._alpha_maximum_iteration):
                new_hyper_parameters = [l[x] + random.random() * (r[x] - l[x]) for x in xrange(len(old_hyper_parameters))]
                trial_alpha, trial_beta = [math.exp(x) for x in new_hyper_parameters]
                lp_test = self.compute_likelihood(trial_alpha, trial_beta)

                if lp_test > log_likelihood_new:
                    self._alpha_alpha = math.exp(new_hyper_parameters[0])
                    self._alpha_beta = math.exp(new_hyper_parameters[1])
                    #self._alpha_sum = self._alpha_alpha * self._K
                    #self._beta_sum = self._alpha_beta * self._number_of_language_types
                    old_hyper_parameters = [math.log(self._alpha_alpha), math.log(self._alpha_beta)]
                    break
                else:
                    for dd in xrange(len(new_hyper_parameters)):
                        if new_hyper_parameters[dd] < old_hyper_parameters[dd]:
                            l[dd] = new_hyper_parameters[dd]
                        else:
                            r[dd] = new_hyper_parameters[dd]
                        assert l[dd] <= old_hyper_parameters[dd]
                        assert r[dd] >= old_hyper_parameters[dd]

            print("\nNew hyperparameters (%i): %f %f" % (jj, self._alpha_alpha, self._alpha_beta))
Ejemplo n.º 11
0
def compute_disp_ntaps(dm,bw,freq):
	NTLIMIT=65536*2
	#
	# Dt calculations are in Mhz, rather than Hz
	#    crazy astronomers....
	mbw = bw/1.0e6
	mfreq = freq/1.0e6

	f_lower = mfreq-(mbw/2)
	f_upper = mfreq+(mbw/2)

	# Compute smear time
	Dt = dm/2.41e-4 * (1.0/(f_lower*f_lower)-1.0/(f_upper*f_upper))

	# ntaps is now bandwidth*smeartime
	ntaps = bw*Dt
	if (ntaps < 32):
		ntaps = 32
	# special "flag" from command-line invoker to get around a bug
	#   in Gnu Radio involving the FFT filter implementation
	#   we can *never* increase the size of an FFT filter at runtime
	#   but can decrease it.  So there's a special "startup" flag (dm=1500.0)
	#   that causes us to return the NTLIMIT number of taps
	#
	if (dm >= 1500.0):
		ntaps = NTLIMIT
	if (ntaps > NTLIMIT):
		ntaps = NTLIMIT
	ntaps = int(math.log(ntaps) / math.log(2))
	ntaps = int(math.pow(2,ntaps+1))
	return(int(ntaps))
Ejemplo n.º 12
0
 def __call__(self, state, scope, pos, paramTypes, datum, classModel):
     ll = 0.0
     if isinstance(datum, list) or isinstance(datum, tuple):
         if len(datum) != len(classModel):
             raise PFARuntimeException("datum and classModel misaligned", self.errcodeBase + 0, self.name, pos)
         for i, x in enumerate(datum):
             mu   = classModel[i]["mean"]
             vari = classModel[i]["variance"]
             if vari <= 0.0:
                 raise PFARuntimeException("variance less than or equal to zero", self.errcodeBase + 1, self.name, pos)
             ll += -0.5*math.log(2.*math.pi * vari)
             ll += -0.5*((x - mu)**2 / vari)
         return ll
     else:
         datumkeys = datum.keys()
         modelkeys = classModel.keys()
         if set(datumkeys) != set(modelkeys):
             raise PFARuntimeException("datum and classModel misaligned", self.errcodeBase + 0, self.name, pos)
         for feature in datumkeys:
             x    = datum[feature]
             mu   = classModel[feature]["mean"]
             vari = classModel[feature]["variance"]
             if vari <= 0.0:
                 raise PFARuntimeException("variance less than or equal to zero", self.errcodeBase + 1, self.name, pos)
             ll += -0.5*math.log(2.*math.pi * vari)
             ll += -0.5*((x - mu)**2 / vari)
         return ll
Ejemplo n.º 13
0
def mdl (g):
    """
    the Minimum Descrition Length calculator for Bayesian network g
    """
    n = len (g.V) # the variable count
    N = len (g.data)# the sample number
    
    logn = math.log (n, 2) # value of log (n)
    logN = math.log (N, 2) # value of log (N)
    
    complexity = sum([logn * len(g.getParentOf(v)) + logN / 2 * product (g.getParentOf(v).cards()) * (v.card - 1) 
                      for v in g.V]) 
    
    logll = 0 #log likelihood 
    for v in g.V:
        for parentVals in g.getParentOf (v).allAssignments ():
            for val in v.values:
                # assignment of the parent
                parentAssignments = dict(zip(map(lambda p: p.var, g.getParentOf (v)), parentVals))
                
                assignments = parentAssignments.copy () #including the child value in the assignment
                assignments[v.var] = val
                
                #the empirical count of the given assignments of parent
                parentN = g.N (**parentAssignments)
                
                #the empirical count of the given assignments of parent and child
                childN = g.N (**assignments)

                if childN != 0:
                    logll += (childN * math.log (childN / parentN, 2))                
                else:
                    pass #nothing happens

    return -logll + complexity
Ejemplo n.º 14
0
def command_line(veb, ra, ov, pr):
	l = len(sys.argv)
	for i in xrange(1, l):
		if not is_switch(sys.argv[i]):
			break

	for j in xrange(i, l): # Start with the first non-switch
		if j != i: # Pretty printing
			print
		response = sys.argv[j]
		if valid_input(response):
			response = response.replace('^', '**')
			try:
				n = eval(response)
				int(n)
			except (SyntaxError, TypeError, ValueError):
				help()
		else:
			help()

		print	'Factoring %d:' % n
		if n < 0:
			print	-1
			n = -n
		if n == 0:
			print	'0 does not have a well-defined factorization.'
			continue
		elif n == 1:
			print	1
			continue

		if ov == DUMMY:
			ov = 2*math.log(math.log(n))
		for factor in factors(n, veb, ra, ov, pr):
			print factor
Ejemplo n.º 15
0
def could_be_prime(n):
	'''Performs some trials to compute whether n could be prime. Run time is O(N^3 / (log N)^2) for N bits.

Returns whether it is possible for n to be prime (True or False).
'''
	if n < 2:
		return False
	if n == 2:
		return True
	if not n & 1:
		return False

	product = ONE
	log_n = int(math.log(n)) + 1
	bound = int(math.log(n) / (LOG_2 * math.log(math.log(n))**2)) + 1
	if bound * log_n >= n:
		bound = 1
		log_n = int(sqrt(n))
	prime_bound = 0
	prime = 3

	for _ in xrange(bound):
		p = []
		prime_bound += log_n
		while prime <= prime_bound:
			p.append(prime)
			prime = next_prime(prime)
		if p != []:
			p = prod(p)
			product = (product * p) % n

	return gcd(n, product) == 1
Ejemplo n.º 16
0
	def compute_accuracy(self, x_original, y_original, t0, t1, h, base=math.e):
		p = plotter()
		# The estimated function is N1
		[x, y] = p.hist2fun(history=h)
		x_temp = x[0:1] + [x[2*i+1] for i in range(len(x)/2)]
		x = x_temp
		y_temp = y[0:1] + [y[2*i+1] for i in range(len(y)/2)]
		y = y_temp
		# Now we merge the two vectors x_original and x, then we add t0 and t1
		x_temp = x_original + x
		x_temp = set(x_temp) # removing duplicates
		x_temp = list(x_temp)
		x_temp.sort()
		#print x_temp
		inf = np.array(x_temp)>t0
		sup = np.array(x_temp)<t1
		inf_sup = inf*sup
		x_temp = set(inf_sup * np.array(x_temp))
		x_temp = list(x_temp)
		x_temp[0] = t0
		x_temp.append(t1)
		x_vect = x_temp  # Now we have the vector. We can compute the formula
		sum_temp = 0
		for i in range(len(x_vect)-1):
			N0_i = float(self.evaluate_func(x_original, y_original, x_vect[i]))
			N1_i = float(self.evaluate_func(x, y, x_vect[i]))
			sum_temp += (abs(N0_i-N1_i)/(N0_i+N1_i))*(math.log(x_vect[i+1], base)-math.log(x_vect[i], base))
		result = float(sum_temp)/(math.log(t1, base)-math.log(t0, base))
		return result
Ejemplo n.º 17
0
 def relate(size, base):
     if size == 0:
         return base
     size = float(size)
     base = float(base)
     if abs(size - base) < 0.1:
         return 0
     sign = -1 if size < base else 1
     endp = 0 if size < base else 36
     diff = (abs(base - size) * 3) + ((36 - size) / 100)
     logb = abs(base - endp)
     if logb == 1.0:
         logb = 1.1
     try:
         result = sign * math.log(diff, logb)
     except ValueError:
         if diff < 0:
             # Size is both very large and close to base
             return 0
         if logb == 0:
             logb = 1e-6
         if diff == 0:
             diff = 1e-6
         result = sign * math.log(diff, logb)
     return result
def get_decision_given_context(theta, type, decision, context):
    global cache_normalizing_decision, feature_index, source_to_target_firing, model1_probs, ets
    m1_event_prob = model1_probs.get((decision, context), 0.0)
    fired_features = get_wa_features_fired(type=type, decision=decision, context=context,
                                           dictionary_features=dictionary_features, ishybrid=True)

    theta_dot_features = sum([theta[feature_index[f]] * f_wt for f_wt, f in fired_features])
    numerator = m1_event_prob * exp(theta_dot_features)
    if (type, context) in cache_normalizing_decision:
        denom = cache_normalizing_decision[type, context]
    else:
        denom = ets[context]
        target_firings = source_to_target_firing.get(context, set([]))
        for tf in target_firings:
            m1_tf_event_prob = model1_probs.get((tf, context), 0.0)
            tf_fired_features = get_wa_features_fired(type=type, decision=tf, context=context,
                                                      dictionary_features=dictionary_features, ishybrid=True)
            tf_theta_dot_features = sum([theta[feature_index[f]] * f_wt for f_wt, f in tf_fired_features])
            denom += m1_tf_event_prob * exp(tf_theta_dot_features)
        cache_normalizing_decision[type, context] = denom
    try:
        log_prob = log(numerator) - log(denom)
    except ValueError:
        print numerator, denom, decision, context, m1_event_prob, theta_dot_features
        raise BaseException
    return log_prob
Ejemplo n.º 19
0
def compress(temp, press=70., sali=0):
    '''Compute water or brine  compressibility from temperature, pressure
    and salinity,     according to Spivey et al (2004).

    temp:   temperature in degrees Celsius.
    press:  pressure in MPa.
    sali:   Concentration of NaCl in ppm.'''

    mols = mol(sali)

    # If salinity == 0, the fluid is water, so we compute the water
    # compressibility
    if sali == 0:
        compress = (1. / coef(EwT, temp)) * log(abs(coef(EwT, temp) * (p / 70.) \
                + coef(FwT, temp)))

    else:
        Fb = Fw
        for j in range(len(Fcm)):
            Fb += coef(Fcm[j], t) * mols ** (j / 2. + 0.5)

        Eb = Ew + coef(Ecm, t) * mols

        compress = (1. / Eb) * log(abs(Eb * (p / 70.) + Fb))

    return compress
def lp(cs, C, ls, ps):
    """
    Args:
        cs: a list containing the cost of probing `X_1, ..., X_n`
        C: the cost budget
        ls: a list of the lengths of the intervals `I_1, ..., I_m`. Each
            element of the list contains the length of the corresponding
            interval.
        ps: a list of functions, each of which takes that take in one argument
            `j` and returns `Pr[X_i >= a_j]`
    Returns:
        a triple of type `(pulp.LpVariable, list of pulp.LpVariable,
        pulp.LpProblem)` with values of `(z, list of y_i,
        unsolved linear program)`.
    """
    assert len(ps) == len(cs)
    n = len(ps)
    m = len(ls)

    problem = pulp.LpProblem('Step 1', pulp.LpMinimize)

    z = pulp.LpVariable('z', cat='Integer')
    ys = [pulp.LpVariable('y' + str(i), lowBound=0, upBound=1, cat='Integer')
          for i in xrange(n)]

    problem += z
    for j in xrange(1, m + 1):
        aa = (math.log(1.0 / p(j)) for p in ps)
        problem += pulp.lpDot(ys, aa) <= math.log(ls[j - 1]) - z, 'j=' + str(j)
    problem += pulp.lpDot(cs, ys) <= C, 'cost'

    return z, ys, problem
Ejemplo n.º 21
0
 def energy(self):
     sum = 0.0
     sum -= di.norm.logpdf(self.data, loc=self.mu, scale=self.sigma).sum()
     #Now add in the priors...
     sum -= log(self.sigma)*(-0.5) - self.nu/2 * (self.mu-self.priormu)**2/self.sigma
     sum -= log(self.sigma)*(self.kappa+2)/(-2) - 0.5*self.priorsigma/self.sigma
     return sum
Ejemplo n.º 22
0
def estimDiv(c, psmc, r, t):
    """Estimate divergence using eq 12
    """
    N0 = 0
    if psmc:
        if not r:
            # parse psmc
            f = open(psmc, 'r')
            line = f.readline().split("-eN ")
            t = [float(i.split()[0]) for i in line[1:]]
            t.insert(0, 0.0)
            r = [float(i.split()[1]) for i in line[1:]]
            N0 = float(line[0].split()[1]) / float(line[0].split()[4])
            r.insert(0, 1.0)
        i = 0
        nc = 1.0
        while (1-nc*exp(-(t[i+1]-t[i])/r[i])) < c:
            nc *= exp(-(t[i+1]-t[i])/r[i])
            i += 1
            #print("i:{}, t[i]:{}, t[i+1]:{}, r[i]:{}, nc:{}".format(i, t[i], t[i+1], r[i], nc))
        j = i
        print("nc = {}, 1-nc = {}".format(nc, 1-nc))
        T_hat = -r[j]*log((1-c) / nc) + t[j]
    else:
        T_hat = -log(1-c)  # assumes constant popsize
    return(r, t, N0, T_hat)
Ejemplo n.º 23
0
    def __init__(self, ref_file, max_n=100, verbose=False):
        '''
        Read the reference file and store wordcounts as class variables:
        - a dictionary mapping words to their log probabilities
        - a dictionary mapping character patterns (e.g. 'abccda' for 'dotted')
          to a list of words and their log probabilities, sorted by probability
        '''
        self.max_n = max_n
        self.verbose = verbose
        if self.verbose:
            print 'processing reference file...'

        # Get words and word probabilities from text and put in dictionary
        self.vectorizer = CountVectorizer(token_pattern=r'(?u)\b[a-zA-Z]+\b')
        wordcounts = self.__get_wordcounts(ref_file)
        self.word_dict = {word:math.log(count+1.0) for count, word in wordcounts}

        # Also put words and probabilities into the dictionary keyed by pattern
        self.words_by_pattern = {}
        for count, word in wordcounts:
            pattern = self.__word_to_pattern(word)
            prob = math.log(count+1.0)
            if pattern in self.words_by_pattern:
                self.words_by_pattern[pattern].append((prob, word))
            else:
                self.words_by_pattern[pattern] = [(prob, word)]

        # Initial null solution
        self.solution = None
        if self.verbose:
            print '...done\n'
Ejemplo n.º 24
0
def make_non_differential_constellation(m, gray_coded):
    side = int(pow(m, 0.5))
    if (not isinstance(m, int) or m < 4 or not is_power_of_four(m)):
        raise ValueError("m must be a power of 4 integer.")
    # Each symbol holds k bits.
    k = int(log(m) / log(2.0))
    if gray_coded:
        # Number rows and columns using gray codes.
        gcs = gray_code(side)
        # Get inverse gray codes.
        i_gcs = mod_codes.invert_code(gcs)
    else:
        i_gcs = range(0, side)
    # The distance between points is found.
    step = 2.0/(side-1)

    gc_to_x = [-1 + i_gcs[gc]*step for gc in range(0, side)]
    # First k/2 bits determine x position.
    # Following k/2 bits determine y position.
    const_map = []
    for i in range(m):
        y = gc_to_x[get_bits(i, 0, k/2)]
        x = gc_to_x[get_bits(i, k/2, k/2)]
        const_map.append(complex(x,y))
    return const_map
Ejemplo n.º 25
0
def predict_class(prediction, prob_other, class_doc_stats, class_prob, word_list, word_dict):
    prob_values = []
    new_prob_values = []
    for class_name in class_prob:
        prob_values.append((class_name, class_prob[class_name]))
    inpfile = open("stopWords.txt", "r")
    line = inpfile.readline()
    stopWords = []
    while line:
        stopWord = line.strip()
        stopWords.append(stopWord)
        line = inpfile.readline()
    inpfile.close()
    for val in prob_values:
        prob = math.log(val[1], 2)
        class_name = val[0]
        for word in word_list:
            word = word.lower()
            # val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", word)
            # if (word in stopWords):
            #	continue
            if word in word_dict:
                prob = prob + math.log(Decimal(word_dict[word][class_name]), 2)
            else:
                prob = prob + math.log(Decimal(prob_other[class_name]), 2)
        new_prob_values.append((class_name, prob))
    prob_values = new_prob_values
    prob_values.sort(key=lambda tup: tup[1], reverse=True)
    return prob_values, prob_values[0][0]
Ejemplo n.º 26
0
    def __call__(self, x, pos=None):
        "Return the format for tick val x at position pos"

        vmin, vmax = self.axis.get_view_interval()
        vmin, vmax = mtransforms.nonsingular(vmin, vmax, expander=0.05)
        d = abs(vmax - vmin)
        b = self._base
        if x == 0:
            return "0"
        sign = np.sign(x)
        # only label the decades
        fx = math.log(abs(x)) / math.log(b)
        isDecade = self.is_decade(fx)
        if not isDecade and self.labelOnlyBase:
            s = ""
        # if 0: pass
        elif fx > 10000:
            s = "%1.0e" % fx
        # elif x<1: s = '$10^{%d}$'%fx
        # elif x<1: s =  '10^%d'%fx
        elif fx < 1:
            s = "%1.0e" % fx
        else:
            s = self.pprint_val(fx, d)
        if sign == -1:
            s = "-%s" % s

        return self.fix_minus(s)
Ejemplo n.º 27
0
    def __call__(self, x, pos=None):
        "Return the format for tick val x at position pos"
        b = self._base
        # only label the decades
        if x == 0:
            return "$0$"
        sign = np.sign(x)
        fx = math.log(abs(x)) / math.log(b)
        isDecade = self.is_decade(fx)

        usetex = rcParams["text.usetex"]

        if sign == -1:
            sign_string = "-"
        else:
            sign_string = ""

        if not isDecade and self.labelOnlyBase:
            s = ""
        elif not isDecade:
            if usetex:
                s = r"$%s%d^{%.2f}$" % (sign_string, b, fx)
            else:
                s = "$\mathdefault{%s%d^{%.2f}}$" % (sign_string, b, fx)
        else:
            if usetex:
                s = r"$%s%d^{%d}$" % (sign_string, b, self.nearest_long(fx))
            else:
                s = r"$\mathdefault{%s%d^{%d}}$" % (sign_string, b, self.nearest_long(fx))

        return s
Ejemplo n.º 28
0
    def __next__(self):
        rv = self.value

        #------------------------------------------------------------------------
        # need to round or we might succumb to the dreaded python rounding
        # error (eg 0.99999 < 0 when multiplying 1/24.0 by 24)
        #------------------------------------------------------------------------
        if round(self.pos, 8) >= round(self.length_cur, 8):
            self.value = 1.0
            rv = 1.0
            self.pos = 0
            self.length_cur = Pattern.value(self.length)
            amp_cur = Pattern.value(self.amp)
            rate_start = 1.0
            rate_end = 1.0 + amp_cur
            steps = TICKS_PER_BEAT * self.length_cur
            self.dv = math.exp(math.log(rate_end / rate_start) / steps)

        self.pos += 1.0 / TICKS_PER_BEAT
        self.value = self.value * self.dv
        #------------------------------------------------------------------------
        # subtract 
        #------------------------------------------------------------------------
        rv = math.log(rv, 2)
        print("warp: %f" % rv)
        return rv
def deviation_score(percentage, lower_bound, upper_bound):
    if percentage < lower_bound:
        return math.log(lower_bound - percentage, lower_bound) * 100
    elif percentage > upper_bound:
        return math.log(percentage - upper_bound, 100 - upper_bound) * 100
    else:
        return 0
def solve(Xs_info, C, aa, f_cost, epsilon):
    """
    Args:
        Xs_info: map from distribution to tuple of type (probe cost, p_ij)
        C: the cost budget
        aa: the values taken on by the distribution
        epsilon: the epsilon 
    """
    # Compute interval lengths. Note that I_i = [a_j, a_{j + 1}].
    assert len(set(aa)) == len(aa), \
        'The values taken on by the distribution must be distinct'
    assert all(itertools.imap(lambda a: a >= 0, aa)), \
        'The values taken on by the distribution must be nonnegative'

    
    ls = [t - s for s, t in zip(aa, aa[1:])]
    cs, ps = zip(*Xs_info.itervalues())
    z, ys, problem = lp(cs, C, ls, ps)

    print problem

    problem.solve()

    print 'z = {}, ys = {}'.format(z.value(), [y.value() for y in ys])

    # The subset S corresponding to i s.t. y_i = 1 is feasible
    s0 = [x for x, y in zip(Xs_info.iterkeys(), ys) if y.value() == 1]
    # new cost budget C(log log m + log 1/e)
    m = len(aa)
    C_relaxed = C * (math.log(math.log(m)) + math.log(1.0 / epsilon))

    X_cost = {k: c for k, (c, _) in Xs_info.iteritems()}
    return minimum_element.minimum_element(X_cost, C_relaxed, f_cost, S_0=s0)
Ejemplo n.º 31
0
        if tf_doc[item] > 0:
            n[item] = n[item] + 1
    TF.append(tf_doc)

TFN = []

for TFj in TF:
    #for each term TFij
    TFj = [TFij / float(max(TFj)) for TFij in TFj]
    TFN.append(TFj)

N = len(corpus)

IDF = []
for termi in range(len(terms)):
    IDF.append(math.log(No_of_Documents / float(n[termi]), 2))

TFNIDF = []
for TFNj in TFN:
    TFNjIDF = []
    for termIDX in range(len(terms)):
        TFNjIDF.append(TFNj[termIDX] * IDF[termIDX])
    TFNIDF.append(TFNjIDF)

TFNIDF_Matrix = np.matrix(TFNIDF)

# Column Max

C_Max = [TFNIDF_Matrix.max(axis=0).item(i) for i in range(0, len(terms))]
#Column Min
C_Min = [TFNIDF_Matrix.min(axis=0).item(i) for i in range(0, len(terms))]
Ejemplo n.º 32
0
def expran(h):
    """
    Return a random variate drawn from the exponential distribution
    with hazard h and mean 1/h.
    """
    return (-log(1.0 - random()) / h)
Ejemplo n.º 33
0
def bnldev(n, pp):
    """
    Return a random deviate drawn from the Binomial distribution
    with parameters n (a positive integer) and pp (a probability).
    """

    # This code was translated from Numerical Recipes in C

    global nold, pold, pc, plog, pclog, en, oldg

    if pp <= 0.5:
        p = pp
    else:
        p = 1.0 - pp

    am = n * p

    if n < 25:
        # Direct method
        bnl = 0.0
        for j in xrange(n):
            if random() < p:
                bnl += 1.0

    elif am < 1.0:
        # Poisson method
        g = exp(-am)
        t = 1.0
        for j in xrange(n):
            t *= random()
            if t < g:
                break
        bnl = j

    else:
        # rejection method
        if n != nold:
            en = n
            oldg = gammln(en + 1.0)
            nold = n

        if p != pold:
            pc = 1.0 - p
            plog = log(p)
            pclog = log(pc)
            pold = p

        sq = sqrt(1.0 * am * pc)

        while True:
            while True:
                angle = pi * random()
                y = tan(angle)
                em = sq * y + am
                if em >= 0 and em < (en + 1.0):
                    break
            em = floor(em)
            t = 1.2*sq*(1.0+y*y)*exp(oldg-gammln(em+1.0) \
                                     - gammln(en-em+1.0) \
                                     + em*plog + (en-em)*pclog)
            if random() < t:
                break
        bnl = em

    if p != pp:
        bnl = n - bnl

    return int(round(bnl))
Ejemplo n.º 34
0
def get_multiplicity(nmin=2, FDR=0.05):
    p_star_dict = {}
    G_score_list = []

    gene_by_pop_dict = {}
    for strain in strains:

        sites_to_remove = get_sites_to_remove(strain)
        gene_count_dict = {}
        if strain == 'minimal':
            dirs = [
                'syn3B_minimal/mm13', 'syn3B_minimal/mm11',
                'syn3B_minimal/mm10', 'syn3B_minimal/mm9'
            ]
            ref_path = mt.get_path(
            ) + '/data/syn3B_minimal/reference/Synthetic.bacterium_JCVI-Syn3A.gb'
        elif strain == 'wildtype':
            dirs = [
                'syn1.0_wildtype/mm6', 'syn1.0_wildtype/mm4',
                'syn1.0_wildtype/mm3', 'syn1.0_wildtype/mm1'
            ]
            ref_path = mt.get_path(
            ) + '/data/syn1.0_wildtype/reference/Synthetic.Mycoplasma.mycoides.JCVI-syn1.0_CP002027.1.gb'
        effective_gene_lengths, effective_gene_lengths_syn, Lsyn, Lnon, substitution_specific_synonymous_fraction = mt.calculate_synonymous_nonsynonymous_target_sizes(
            ref_path)
        for dir in dirs:
            for i, line in enumerate(
                    open(mt.get_path() + '/data/' + dir + '/annotated.gd',
                         'r')):
                line_split = line.strip().split('\t')
                if line_split[0] not in output_to_keep:
                    continue
                if line_split[3] + '_' + line_split[4] in sites_to_remove:
                    continue
                frequency = float([s for s in line_split
                                   if 'frequency=' in s][0].split('=')[1])
                if frequency != 1:
                    continue
                if line_split[0] == 'SNP':
                    if [s for s in line_split if 'snp_type=' in s
                        ][0].split('=')[1] == 'nonsynonymous':
                        locus_tag = [
                            s for s in line_split if 'locus_tag=' in s
                        ][0].split('=')[1]
                        frequency = float([
                            s for s in line_split if 'frequency=' in s
                        ][0].split('=')[1])
                        if ';' in locus_tag:
                            for locus_tag_j in locus_tag.split(';'):
                                if locus_tag_j not in gene_count_dict:
                                    gene_count_dict[locus_tag_j] = 0
                                gene_count_dict[locus_tag_j] += 1
                        else:
                            if locus_tag not in gene_count_dict:
                                gene_count_dict[locus_tag] = 0
                            gene_count_dict[locus_tag] += 1

                    else:
                        continue
                else:
                    if len(
                        [s for s in line_split if 'gene_position=coding' in s
                         ]) >= 1:
                        locus_tag = [
                            s for s in line_split if 'locus_tag=' in s
                        ][0].split('=')[1]
                        frequency = float([
                            s for s in line_split if 'frequency=' in s
                        ][0].split('=')[1])
                        if ';' in locus_tag:
                            for locus_tag_j in locus_tag.split(';'):
                                if locus_tag_j not in gene_count_dict:
                                    gene_count_dict[locus_tag_j] = 0
                                gene_count_dict[locus_tag_j] += 1

                        else:
                            if locus_tag not in gene_count_dict:
                                gene_count_dict[locus_tag] = 0
                            gene_count_dict[locus_tag] += 1

        # get multiplicity scores
        gene_parallelism_statistics = {}
        for gene_i, length_i in effective_gene_lengths.items():
            gene_parallelism_statistics[gene_i] = {}
            gene_parallelism_statistics[gene_i]['length'] = length_i
            gene_parallelism_statistics[gene_i]['observed'] = 0
            gene_parallelism_statistics[gene_i]['multiplicity'] = 0

        # save number of mutations for multiplicity
        for locus_tag_i, n_muts_i in gene_count_dict.items():
            gene_parallelism_statistics[locus_tag_i]['observed'] = n_muts_i

        L_mean = np.mean(list(effective_gene_lengths.values()))
        L_tot = sum(list(effective_gene_lengths.values()))
        n_tot = sum(gene_count_dict.values())
        # don't include taxa with less than 20 mutations
        print("N_total = " + str(n_tot))
        # go back over and calculate multiplicity
        for locus_tag_i in gene_parallelism_statistics.keys():
            # double check the measurements from this
            gene_parallelism_statistics[locus_tag_i][
                'multiplicity'] = gene_parallelism_statistics[locus_tag_i][
                    'observed'] * 1.0 / effective_gene_lengths[
                        locus_tag_i] * L_mean
            gene_parallelism_statistics[locus_tag_i][
                'expected'] = n_tot * gene_parallelism_statistics[locus_tag_i][
                    'length'] / L_tot

        pooled_multiplicities = np.array([
            gene_parallelism_statistics[gene_name]['multiplicity']
            for gene_name in gene_parallelism_statistics.keys()
            if gene_parallelism_statistics[gene_name]['multiplicity'] >= 1
        ])
        pooled_multiplicities.sort()

        pooled_tupe_multiplicities = np.array([
            (gene_parallelism_statistics[gene_name]['multiplicity'],
             gene_parallelism_statistics[gene_name]['observed'])
            for gene_name in gene_parallelism_statistics.keys()
            if gene_parallelism_statistics[gene_name]['multiplicity'] >= 1
        ])
        pooled_tupe_multiplicities = sorted(pooled_tupe_multiplicities,
                                            key=lambda x: x[0])
        pooled_tupe_multiplicities_x = [
            i[0] for i in pooled_tupe_multiplicities
        ]
        pooled_tupe_multiplicities_y = [
            i[1] for i in pooled_tupe_multiplicities
        ]
        pooled_tupe_multiplicities_y = [
            sum(pooled_tupe_multiplicities_y[i:]) /
            sum(pooled_tupe_multiplicities_y)
            for i in range(len(pooled_tupe_multiplicities_y))
        ]

        null_multiplicity_survival = mt.NullGeneMultiplicitySurvivalFunction.from_parallelism_statistics(
            gene_parallelism_statistics)
        null_multiplicity_survival_copy = null_multiplicity_survival(
            pooled_multiplicities)
        null_multiplicity_survival_copy = [
            sum(null_multiplicity_survival_copy[i:]) /
            sum(null_multiplicity_survival_copy)
            for i in range(len(null_multiplicity_survival_copy))
        ]
        #threshold_idx = numpy.nonzero((null_multiplicity_survival(observed_ms)*1.0/observed_multiplicity_survival)<FDR)[0][0]
        mult_survival_dict = {
            'Mult': pooled_multiplicities,
            'Obs_fract': pooled_tupe_multiplicities_y,
            'Null_fract': null_multiplicity_survival_copy
        }
        mult_survival_df = pd.DataFrame(mult_survival_dict)
        mult_survival_df_out = mt.get_path(
        ) + '/data/mult_survival_curves_' + strain + '.txt'
        mult_survival_df.to_csv(mult_survival_df_out, sep='\t', index=True)

        # get likelihood score and null test
        observed_G, pvalue = mt.calculate_total_parallelism(
            gene_parallelism_statistics)
        G_score_list.append((strain, observed_G, pvalue))
        print(strain, observed_G, pvalue)

        # Give each gene a p-value, get distribution
        gene_logpvalues = mt.calculate_parallelism_logpvalues(
            gene_parallelism_statistics)
        pooled_pvalues = []
        for gene_name in gene_logpvalues.keys():
            if (gene_parallelism_statistics[gene_name]['observed'] >=
                    nmin) and (float(gene_logpvalues[gene_name]) >= 0):
                pooled_pvalues.append(gene_logpvalues[gene_name])

        pooled_pvalues = np.array(pooled_pvalues)
        pooled_pvalues.sort()
        if len(pooled_pvalues) == 0:
            continue

        null_pvalue_survival = mt.NullGeneLogpSurvivalFunction.from_parallelism_statistics(
            gene_parallelism_statistics, nmin=nmin)
        observed_ps, observed_pvalue_survival = mt.calculate_unnormalized_survival_from_vector(
            pooled_pvalues, min_x=-4)
        # Pvalue version
        # remove negative minus log p values.
        neg_p_idx = np.where(observed_ps >= 0)
        observed_ps_copy = observed_ps[neg_p_idx]
        observed_pvalue_survival_copy = observed_pvalue_survival[neg_p_idx]
        pvalue_pass_threshold = np.nonzero(
            null_pvalue_survival(observed_ps_copy) * 1.0 /
            observed_pvalue_survival_copy < FDR)[0]
        if len(pvalue_pass_threshold) == 0:
            continue
        threshold_idx = pvalue_pass_threshold[0]
        pstar = observed_ps_copy[
            threshold_idx]  # lowest value where this is true
        num_significant = observed_pvalue_survival[threshold_idx]
        # make it log base 10
        logpvalues_dict = {
            'P_value': observed_ps / math.log(10),
            'Obs_num': observed_pvalue_survival,
            'Null_num': null_pvalue_survival(observed_ps)
        }
        logpvalues_df = pd.DataFrame(logpvalues_dict)
        logpvalues_df_out = mt.get_path(
        ) + '/data/logpvalues_' + strain + '.txt'
        logpvalues_df.to_csv(logpvalues_df_out, sep='\t', index=True)

        p_star_dict[strain] = (num_significant, pstar / math.log(10))

        output_mult_gene_filename = mt.get_path(
        ) + '/data/mult_genes_sig_' + strain + '.txt'
        output_mult_gene = open(output_mult_gene_filename, "w")
        output_mult_gene.write(",".join([
            "Gene", "Length", "Observed", "Expected", "Multiplicity",
            "-log10(P)"
        ]))
        for gene_name in sorted(
                gene_parallelism_statistics,
                key=lambda x: gene_parallelism_statistics.get(x)['observed'],
                reverse=True):
            if gene_logpvalues[
                    gene_name] >= pstar and gene_parallelism_statistics[
                        gene_name]['observed'] >= nmin:
                output_mult_gene.write("\n")
                # log base 10 transform the p-values here as well
                output_mult_gene.write(
                    "%s, %0.1f, %d, %0.2f, %0.2f, %g" %
                    (gene_name,
                     gene_parallelism_statistics[gene_name]['length'],
                     gene_parallelism_statistics[gene_name]['observed'],
                     gene_parallelism_statistics[gene_name]['expected'],
                     gene_parallelism_statistics[gene_name]['multiplicity'],
                     abs(gene_logpvalues[gene_name]) / math.log(10)))
        output_mult_gene.close()

    total_parallelism_path = mt.get_path() + '/data/total_parallelism.txt'
    total_parallelism = open(total_parallelism_path, "w")
    total_parallelism.write("\t".join(["Strain", "G_score", "p_value"]))
    for i in range(len(G_score_list)):
        taxon_i = G_score_list[i][0]
        G_score_i = G_score_list[i][1]
        p_value_i = G_score_list[i][2]
        total_parallelism.write("\n")
        total_parallelism.write("\t".join(
            [taxon_i, str(G_score_i), str(p_value_i)]))

    total_parallelism.close()
    with open(mt.get_path() + '/data/p_star.txt', 'wb') as file:
        file.write(
            pickle.dumps(p_star_dict))  # use `pickle.loads` to do the reverse
def calc(x):
    return str(math.log(abs(12*math.sin(int(x)))))
Ejemplo n.º 36
0
def log_uniform(lo, hi, rate):
  log_lo = math.log(lo)
  log_hi = math.log(hi)
  v = log_lo * (1-rate) + log_hi * rate
  return math.exp(v)
 def log_cluster_assign_score(self, cluster_id):
     current_cluster_size = self.suffstats[cluster_id].num_pts
     return log(current_cluster_size +
                float(self.alpha) / self.num_clusters)
Ejemplo n.º 38
0
# http://infohost.nmt.edu/~es421/pascal/list12.pas
# PROGRAM Tlog
# test ln and exp
# From Borland Pascal Programs for Scientists and Engineers
# by Alan R. Miller, Copyright C 1993, SYBEX Inc

import math

X = 1.0E-4 / 0.3
for I in range(1, 11):
    Y = math.log(X)
    print ' X =', X, ', Exp(Ln) =', math.exp(Y)
    X = 0.5 * X
Ejemplo n.º 39
0
    error = 0
    for i in zip(case2, res_f):
        ea_err = i[0] - i[1]
        error += copysign(1, ea_err) * (ea_err**2)
    error = error / sum(case2)
    if error < 1e5 and error > -1e5:
        print('case2')
    elif error > 0:
        print('case1')
    else:
        print('case3')


# Slide
master_brute_force(lambda n: n, 4, 2)
master_brute_force(lambda n: n * log(n, 10), 2, 2)
master_brute_force(lambda n: n * log(n, 10), 1, 3)
master_brute_force(lambda n: n**2, 8, 2)
master_brute_force(lambda n: n**3, 9, 3)
master_brute_force(lambda n: 1, 1, 2)
master_brute_force(lambda n: log(n, 10), 2, 2)
print()
# HW
master_brute_force(lambda n: n**2, 5, 2)
master_brute_force(lambda n: n**1.5, 5, 2)
master_brute_force(lambda n: (n**2) * log(n, 2), 10, 10, base=2)
print()

# Exam
master_brute_force(lambda n: n**2 * log(n, 10), 4, 2)
master_brute_force(lambda n: 1, 3, 2)
Ejemplo n.º 40
0
def hurstExponent(x, d=50):
    # Find such a natural number OptN that possesses the largest number of
    # divisors among all natural numbers in the interval [0.99*N,N]
    dmin, N, N0 = d, x.shape[0], math.floor(0.99 * x.shape[0])
    dv = np.zeros((N - N0 + 1, ))
    for i in range(N0, N + 1):
        dv[i - N0] = divGtN0(i, dmin).shape[0]
    optN = N0 + np.max(np.arange(0, N - N0 + 1)[max(dv) == dv])
    # Use the first OptN values of x for further analysis
    x = x[:optN]
    d = divGtN0(optN, dmin)

    N = d.shape[0]
    RSe, ERS = np.zeros((N, )), np.zeros((N, ))

    # Calculate empirical R/S
    for i in range(N):
        RSe[i] = rscalc(x, d[i])

    # Compute Anis-Lloyd [1] and Peters [3] corrected theoretical E(R/S)
    # (see [4] for details)
    for i in range(N):
        n = d[i]
        K = np.arange(1, n)
        ratio = (n - 0.5) / n * np.sum(np.sqrt((np.ones((n - 1)) * n - K) / K))
        if n > 340:
            ERS[i] = ratio / math.sqrt(0.5 * math.pi * n)
        else:
            ERS[i] = (math.gamma(0.5 * (n - 1)) *
                      ratio) / (math.gamma(0.5 * n) * math.sqrt(math.pi))

    # Calculate the Anis-Lloyd/Peters corrected Hurst exponent
    # Compute the Hurst exponent as the slope on a loglog scale
    ERSal = np.sqrt(0.5 * math.pi * d)
    Pal = np.polyfit(np.log10(d), np.log10(RSe - ERS + ERSal), 1)
    Hal = Pal[0]

    # Calculate the empirical and theoretical Hurst exponents
    Pe = np.polyfit(np.log10(d), np.log10(RSe), 1)
    He = Pe[0]
    P = np.polyfit(np.log10(d), np.log10(ERS), 1)
    Ht = P[0]

    # Compute empirical confidence intervals (see [4])
    L = math.log2(optN)
    # R/S-AL (min(divisor)>50) two-sided empirical confidence intervals
    #pval95 = np.array([0.5-exp(-7.33*log(log(L))+4.21) exp(-7.20*log(log(L))+4.04)+0.5])
    lnlnL = math.log(math.log(L))
    c1 = [
        0.5 - math.exp(-7.35 * lnlnL + 4.06),
        math.exp(-7.07 * lnlnL + 3.75) + 0.5, 0.90
    ]
    c2 = [
        0.5 - math.exp(-7.33 * lnlnL + 4.21),
        math.exp(-7.20 * lnlnL + 4.04) + 0.5, 0.95
    ]
    c3 = [
        0.5 - math.exp(-7.19 * lnlnL + 4.34),
        math.exp(-7.51 * lnlnL + 4.58) + 0.5, 0.99
    ]
    C = np.array([c1, c2, c3])

    detail = (d, optN, RSe, ERS, ERSal)
    return (Hal, He, Ht, C, detail)
xg_reg.fit(x2,y2)

final_portfolio=pd.DataFrame(columns=['ID','ln_LR'])
error=[]
for x in range(1,601):
    try:
        df_test=pd.read_csv('test_portfolio_cleaned17nov_'+str(x)+'.csv')
        #df_test=df_test.drop(['Unnamed: 0'], axis=1)
        x_test=df_test.values[:,:30]
        preds=model.predict(x_test)
        df_test['Loss_or_Not']=preds
        df_test_reg=df_test.loc[df_test['Loss_or_Not']==1]
        x_test_reg=df_test_reg.values[:,:30]
        preds_reg = xg_reg.predict(x_test_reg)
        df_test_reg['Loss']=preds_reg
        df_test=df_test.loc[df_test['Loss_or_Not']==0]
        df_test['Loss']=0
        df_test=df_test.append(df_test_reg)
        #df_test['Annual_Premium'].sum()
        #df_test['Loss'].sum()
        
        loss_ratio=df_test['Loss'].sum()/df_test['Annual_Premium'].sum()
        #math.log(loss_ratio)

        final_portfolio=final_portfolio.append({'ID':'portfolio_'+str(x), 'ln_LR':math.log(loss_ratio)},ignore_index=True)
    except:
        error.append(x)
        pass
    
final_portfolio.to_csv('result.csv',index=False)
Ejemplo n.º 42
0
def getNDCG(ranklist, gtItem):
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item == gtItem:
            return math.log(2) / math.log(i+2)
    return 0
Ejemplo n.º 43
0
 def MQGetPercentage(self, rs_ro_ratio, pcurve):
     return (math.pow(10,( ((math.log(rs_ro_ratio)-pcurve[1])/ pcurve[2]) + pcurve[0])))
Ejemplo n.º 44
0
def corpus_bleu(
    list_of_references,
    hypotheses,
    weights=(0.25, 0.25, 0.25, 0.25),
    smoothing_function=None,
    auto_reweigh=False,
):
    """
    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all
    the hypotheses and their respective references.

    Instead of averaging the sentence level BLEU scores (i.e. marco-average
    precision), the original BLEU metric (Papineni et al. 2002) accounts for
    the micro-average precision (i.e. summing the numerators and denominators
    for each hypothesis-reference(s) pairs before the division).

    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
    ...         'ensures', 'that', 'the', 'military', 'always',
    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
    ...          'heed', 'Party', 'commands']
    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
    ...          'guarantees', 'the', 'military', 'forces', 'always',
    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
    ...          'of', 'the', 'party']

    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was',
    ...         'interested', 'in', 'world', 'history']
    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history',
    ...          'because', 'he', 'read', 'the', 'book']

    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
    >>> hypotheses = [hyp1, hyp2]
    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
    0.5920...

    The example below show that corpus_bleu() is different from averaging
    sentence_bleu() for hypotheses

    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
    >>> score2 = sentence_bleu([ref2a], hyp2)
    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
    0.6223...

    :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses
    :type list_of_references: list(list(list(str)))
    :param hypotheses: a list of hypothesis sentences
    :type hypotheses: list(list(str))
    :param weights: weights for unigrams, bigrams, trigrams and so on
    :type weights: list(float)
    :param smoothing_function:
    :type smoothing_function: SmoothingFunction
    :param auto_reweigh: Option to re-normalize the weights uniformly.
    :type auto_reweigh: bool
    :return: The corpus-level BLEU score.
    :rtype: float
    """
    # Before proceeding to compute BLEU, perform sanity checks.

    p_numerators = Counter()  # Key = ngram order, and value = no. of ngram matches.
    p_denominators = Counter()  # Key = ngram order, and value = no. of ngram in ref.
    hyp_lengths, ref_lengths = 0, 0

    assert len(list_of_references) == len(hypotheses), (
        "The number of hypotheses and their reference(s) should be the " "same "
    )

    # Iterate through each hypothesis and their corresponding references.
    for references, hypothesis in zip(list_of_references, hypotheses):
        # For each order of ngram, calculate the numerator and
        # denominator for the corpus-level modified precision.
        for i, _ in enumerate(weights, start=1):
            p_i = modified_precision(references, hypothesis, i)
            p_numerators[i] += p_i.numerator
            p_denominators[i] += p_i.denominator

        # Calculate the hypothesis length and the closest reference length.
        # Adds them to the corpus-level hypothesis and reference counts.
        hyp_len = len(hypothesis)
        hyp_lengths += hyp_len
        ref_lengths += closest_ref_length(references, hyp_len)

    # Calculate corpus-level brevity penalty.
    bp = brevity_penalty(ref_lengths, hyp_lengths)

    # Uniformly re-weighting based on maximum hypothesis lengths if largest
    # order of n-grams < 4 and weights is set at default.
    if auto_reweigh:
        if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25):
            weights = (1 / hyp_lengths,) * hyp_lengths

    # Collects the various precision values for the different ngram orders.
    p_n = [
        Fraction(p_numerators[i], p_denominators[i], _normalize=False)
        for i, _ in enumerate(weights, start=1)
    ]

    # Returns 0 if there's no matching n-grams
    # We only need to check for p_numerators[1] == 0, since if there's
    # no unigrams, there won't be any higher order ngrams.
    if p_numerators[1] == 0:
        return 0

    # If there's no smoothing, set use method0 from SmoothinFunction class.
    if not smoothing_function:
        smoothing_function = SmoothingFunction().method0
    # Smoothen the modified precision.
    # Note: smoothing_function() may convert values into floats;
    #       it tries to retain the Fraction object as much as the
    #       smoothing method allows.
    p_n = smoothing_function(
        p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths
    )
    s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n))
    s = bp * math.exp(math.fsum(s))
    return s
Ejemplo n.º 45
0
from math import log
from src.prune_seqs import prune_seqs
from src.fmt_read_id import fmt_read_id
from src.filesystem import OPEN_FUNCS, FORMATTING_FUNCS, is_gzipped
from src.printlog import printlog_warning

FASTQ_LINES_PER_READ = 4

# Function for getting Q value from Phred33 character:
substr_phred33 = lambda q_symb: ord(q_symb) - 33
# List of probabilities corresponding to indices (index is Q, value is the propability):
q2p_map = [10 ** (-q/10) for q in range(128)] # 127 -- max value of a signed byte
# Function for accessing propabilities by Q:
qual2prop = lambda q: q2p_map[q]
# Function for accessing Q by propability:
prop2qual = lambda p: round(-10 * log(p, 10), 2)


def get_read_avg_qual(qual_str):
    # Function calculates mean quality of a single read.
    # :param qual_str: read's quality line in Phred33;
    # :type qual_str: str;

    quals = map(substr_phred33, qual_str) # get Qs
    err_props = map(qual2prop, quals) # convert Qs to propabilities
    avg_err_prop = sum(err_props) / len(qual_str) # calculate average propability
    return prop2qual(avg_err_prop)
# end def get_read_avg_qual


def form_packet_numseqs(fastq_file, packet_size, fmt_func, max_seq_len):
Ejemplo n.º 46
0
#args3['str-init'] = [-40.0]
#args3[('str-init', 'reset-optimizer', 'memory-efficient-fp16')] = [(-10.0, True, True)]
args3['str-init'] = [-10.0, -7.5]
args3['str-sparse'] = [False]
args3['str-lr'] = [0.00, 0.001]
args3['str-mult-offset'] = [0.01]
args3[('str-noisy-relu', 'str-weight-noise')] = [(0.1, 0.0), (0.2, 0.0), (0.3, 0.0)]

args3[('max-update', 'warmup-updates', '')] = [(16000, 3000, ' /private/home/timdettmers/data/cc_small')]

args3['weight-decay'] = [0.00]

key = ('lr', 'warmup-init-lr')
args3[key] = []
for params in [1e5]:
    lr = 0.003239 + (-0.0001395*math.log(params))
    args3[key].append((lr, lr*0.1))
args4 = []

args5 = {}

args6 = {}

rdm = np.random.RandomState(5345)

for key, value in args2.items():
    cmd = cmd + ' --{0} {1}'.format(key, value)

args_prod = []
for key, values in args3.items():
    if isinstance(key, tuple):
Ejemplo n.º 47
0
def log(o):
    if hasattr(o, "__log__"): return o.__log__()
    return math.log(o)
Ejemplo n.º 48
0
def b_spline_nurbs(x, y, z, fname_centerline=None, degree=3, point_number=3000, nbControl=-1, verbose=1,
                   all_slices=True, path_qc='.'):
    """
    3D B-Spline function
    :param x:
    :param y:
    :param z:
    :param fname_centerline:
    :param degree:
    :param point_number:
    :param nbControl:
    :param verbose:
    :param all_slices:
    :param path_qc:
    :return:
    """
    from math import log

    twodim = False
    if z is None:
        twodim = True

    """x.reverse()
    y.reverse()
    z.reverse()"""

    logger.info('Fitting centerline using B-spline approximation')
    if not twodim:
        data = [[x[n], y[n], z[n]] for n in range(len(x))]
    else:
        data = [[x[n], y[n]] for n in range(len(x))]

    # if control_points == 0:
    #     nurbs = NURBS(degree, point_number, data) # BE very careful with the spline order that you choose : if order is too high ( > 4 or 5) you need to set a higher number of Control Points (cf sct_nurbs ). For the third argument (number of points), give at least len(z_centerline)+500 or higher
    # else:
    #     sct.printv('In b_spline_nurbs we get control_point = ', control_points)
    #     nurbs = NURBS(degree, point_number, data, False, control_points)

    if nbControl == -1:
        centerlineSize = getSize(x, y, z, fname_centerline)
        nbControl = 30 * log(centerlineSize, 10) - 42
        nbControl = np.round(nbControl)

    nurbs = NURBS(degree, point_number, data, False, nbControl, verbose, all_slices=all_slices, twodim=twodim)

    if not twodim:
        P = nurbs.getCourbe3D()
        x_fit = P[0]
        y_fit = P[1]
        z_fit = P[2]
        Q = nurbs.getCourbe3D_deriv()
        x_deriv = Q[0]
        y_deriv = Q[1]
        z_deriv = Q[2]
    else:
        P = nurbs.getCourbe2D()
        x_fit = P[0]
        y_fit = P[1]
        Q = nurbs.getCourbe2D_deriv()
        x_deriv = Q[0]
        y_deriv = Q[1]

    """x_fit = x_fit[::-1]
    y_fit = x_fit[::-1]
    z_fit = x_fit[::-1]
    x_deriv = x_fit[::-1]
    y_deriv = x_fit[::-1]
    z_deriv = x_fit[::-1]"""

    if verbose == 2:
        # TODO qc
        PC = nurbs.getControle()
        PC_x = [p[0] for p in PC]
        PC_y = [p[1] for p in PC]
        if not twodim:
            PC_z = [p[2] for p in PC]

        import matplotlib
        matplotlib.use('Agg')  # prevent display figure
        import matplotlib.pyplot as plt
        if not twodim:
            plt.figure(1)
            #ax = plt.subplot(211)
            plt.subplot(211)
            plt.plot(z, x, 'r.')
            plt.plot(z_fit, x_fit)
            plt.plot(PC_z, PC_x, 'go')
            # ax.set_aspect('equal')
            plt.xlabel('z')
            plt.ylabel('x')
            plt.legend(["centerline", "NURBS", "control points"])
            #ay = plt.subplot(212)
            plt.subplot(212)
            plt.plot(z, y, 'r.')
            plt.plot(z_fit, y_fit)
            plt.plot(PC_z, PC_y, 'go')
            # ay.set_aspect('equal')
            plt.xlabel('z')
            plt.ylabel('y')
            plt.legend(["centerline", "NURBS", "control points"],loc=4)
            # plt.show()
        else:
            plt.figure(1)
            plt.plot(y, x, 'r.')
            plt.plot(y_fit, x_fit)
            plt.plot(PC_y, PC_x, 'go')
            # ax.set_aspect('equal')
            plt.xlabel('y')
            plt.ylabel('x')
            plt.legend(["centerline", "NURBS", "control points"])
            # plt.show()
        plt.savefig(os.path.join(path_qc, 'fig_b_spline_nurbs.png'))
        plt.close()

    if not twodim:
        return x_fit, y_fit, z_fit, x_deriv, y_deriv, z_deriv, nurbs.error_curve_that_last_worked
    else:
        return x_fit, y_fit, x_deriv, y_deriv, nurbs.error_curve_that_last_worked
Ejemplo n.º 49
0
    n = int(input("Digite a quantidade de números: "))
    sn = 0
    i = 0
    while i < n:
        s = float(input("Digite um número: "))
        sn += s
        i = i + 1
    M = sn / n
    print("A média dos números digitados é igual a {}".format(M))

if tipoCalculo == 9:
    print("---LOG---")
    b = float(input("Digite a base do log: "))
    x = float(input("Digite um número: "))

    lg = math.log(x, b)
    print("O log de {} na base {} é igual a {}".format(x, b, lg))

if tipoCalculo == 10:
    print("---SISTEMAS LINEARES---")
    A1 = int(input("Digite o valor que acompanha o 1º X: "))
    B1 = int(input("Digite o valor que acompanha o 1º Y: "))
    A2 = int(input("Digite o valor que acompanha o 2º X: "))
    B2 = int(input("Digite o valor que acompanha o 2º Y: "))
    C1 = int(input("Digite o valor que é resultante da 1ª equação: "))
    C2 = int(input("Digite o valor que é resultante da 2ª equação: "))

    import numpy as np

    A = np.array([[A1, B1], [A2, B2]])
    B = np.array([[C1], [C2]])
Ejemplo n.º 50
0
    def NOISE_FREE_RES(SPS, vref=2.048):
        # check operating mode
        current_op_mode = self.OPERATING_MODE()

        # check sample rate and translate to list location using the dictionary
        normal_sps_translate = {
            20: 0,
            45: 1,
            90: 2,
            175: 3,
            330: 4,
            600: 5,
            1000: 6
        }
        turbo_sps_translate = {
            40: 0,
            90: 1,
            180: 2,
            350: 3,
            660: 4,
            1200: 5,
            2000: 6
        }
        current_sps = self.DATA_RATE()
        if current_op_mode == 'turbo':
            translated_sps = turbo_sps_translate[current_sps]
        if current_op_mode == 'normal':
            translated_sps = normal_sps_translate[current_sps]

        # check gain and translate it to list loation
        current_gn = self.PGA()
        translated_gain = int(math.log(current_gn) / math.log(2))

        # check pga status
        current_pga_status = self.PGA_ENABLED()

        # Set correct reference voltage for calculations
        # Check vref or use manual if provided
        if vref != -1:
            current_ref_voltage = vref
        else:
            current_ref = self.VOLTAGE_REF()
            if current_ref == 'internal':
                current_ref_voltage = 2.048
            elif current_ref == 'analog_supply':
                current_ref_voltage = 3.3

        # Select appropriate noise for current states
        # If the pga is disabled
        if current_pga_status == 0:
            # If turbo mode is enabled
            if current_op_mode == 'turbo':
                noise = CONST._TURBO_NOISE_PGA_DISABLED[translated_sps][
                    translated_gain][1] * (10**(-6))
            # If normal mode is enabled
            else:
                noise = CONST._NORMAL_NOISE_PGA_DISABLED[translated_sps][
                    translated_gain][1] * (10**(-6))
        # If the pga is enabled
        else:
            # If turbo mode is enabled
            if current_op_mode == 'turbo':
                noise = CONST._TURBO_NOISE[translated_sps][translated_gain][1]
            # If normal mode is enabled
            else:
                noise = CONST._NORMAL_NOISE[translated_sps][translated_gain][1]

        # Calculate and return the current effective resolution (uV RMS)
        res = math.log(
            (2 * current_ref_voltage) / (current_gn * noise)) / math.log(2)
        return res
Ejemplo n.º 51
0
    def get_target(self, target, anchors, in_w, in_h, ignore_threshold):
        bs = len(target)
        anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)]
        subtract_index = [0,3,6][self.feature_length.index(in_w)]
        mask = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        noobj_mask = torch.ones(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)

        tx = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        ty = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        tw = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        th = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        tconf = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        tcls = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, self.num_classes, requires_grad=False)

        box_loss_scale_x = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        box_loss_scale_y = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False)
        for b in range(bs):            
            if len(target[b])==0:
                continue
            gxs = target[b][:, 0:1] * in_w
            gys = target[b][:, 1:2] * in_h
            
            gws = target[b][:, 2:3] * in_w
            ghs = target[b][:, 3:4] * in_h

            gis = torch.floor(gxs)
            gjs = torch.floor(gys)

            gt_box = torch.FloatTensor(torch.cat([torch.zeros_like(gws), torch.zeros_like(ghs), gws, ghs], 1))

            anchor_shapes = torch.FloatTensor(torch.cat((torch.zeros((self.num_anchors, 2)), torch.FloatTensor(anchors)), 1))

            anch_ious = jaccard(gt_box, anchor_shapes)
            best_ns = torch.argmax(anch_ious,dim=-1)
            for i, best_n in enumerate(best_ns):
                if best_n not in anchor_index:
                    continue

                gi = gis[i].long()
                gj = gjs[i].long()
                gx = gxs[i]
                gy = gys[i]
                gw = gws[i]
                gh = ghs[i]

                if (gj < in_h) and (gi < in_w):
                    best_n = best_n - subtract_index

                    noobj_mask[b, best_n, gj, gi] = 0
                    mask[b, best_n, gj, gi] = 1

                    tx[b, best_n, gj, gi] = gx - gi.float()
                    ty[b, best_n, gj, gi] = gy - gj.float()

                    tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n+subtract_index][0])
                    th[b, best_n, gj, gi] = math.log(gh / anchors[best_n+subtract_index][1])

                    box_loss_scale_x[b, best_n, gj, gi] = target[b][i, 2]
                    box_loss_scale_y[b, best_n, gj, gi] = target[b][i, 3]

                    tconf[b, best_n, gj, gi] = 1

                    tcls[b, best_n, gj, gi, int(target[b][i, 4])] = 1
                else:
                    print('Step {0} out of bound'.format(b))
                    print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(gj, in_h, gi, in_w))
                    continue

        return mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y
def create_feature(data, uni_seen_list, bi_seen_list, tri_seen_list, i_num,
                   doUpdate):
    # unigram process
    uni_old_list = copy.deepcopy(uni_seen_list)
    bi_old_list = copy.deepcopy(bi_seen_list)
    tri_old_list = copy.deepcopy(tri_seen_list)
    uni_old_freq_list = copy.deepcopy(uni_seen_freq_list)
    bi_old_freq_list = copy.deepcopy(bi_seen_freq_list)

    num_uni = len(data)
    num_uni_unseen = 0
    freq_uni_seen = 0
    for uni in data:
        freq_uni_seen += uni_old_freq_list[uni]
        if not uni in uni_old_list:
            num_uni_unseen += 1
            if doUpdate:
                uni_seen_list.append(uni)
        if doUpdate:
            uni_seen_freq_list[uni] += 1
    prop_uni_unseen = num_uni_unseen / num_uni  # proportion of unseen unigram words

    mean_freq_uni = freq_uni_seen / num_uni
    # print (mean_freq_uni)

    # bigram process
    num_bi = len(data) - 1
    num_bi_unseen = 0
    freq_bi_seen = 0
    for i in range(num_bi):
        bi = list(data[i:i + 2])
        freq_bi_seen += bi_old_freq_list[bi[0], bi[1]]
        # print (freq_bi_seen)
        if not bi in bi_old_list:
            num_bi_unseen += 1
            if doUpdate:
                bi_seen_list.append(bi)
        if doUpdate:
            bi_seen_freq_list[bi[0]][bi[1]] += 1
    prop_bi_unseen = num_bi_unseen / num_bi  # proportion of unseen bigram words

    mean_freq_bi = freq_bi_seen / num_bi

    # trigram process
    num_tri = len(data) - 2
    num_tri_unseen = 0
    for i in range(num_tri):
        tri = list(data[i:i + 3])
        if not tri in tri_old_list:
            num_tri_unseen += 1
            if doUpdate:
                tri_seen_list.append(tri)
                # tri_seen_freq_list[tri[0]][tri[1]][tri[2]] += 1
    prop_tri_unseen = num_tri_unseen / num_tri  # proportion of unseen trigram words

    # Frequency
    # print (np.sum(bi_seen_freq_list))

    # create tensor variable
    input_feature = torch.Tensor(
        np.array([
            prop_uni_unseen, prop_bi_unseen, prop_tri_unseen, mean_freq_uni,
            mean_freq_bi,
            math.log(i_num + 1)
        ]))
    input_feature = input_feature.view(-1, 6)

    return input_feature
Ejemplo n.º 53
0
 def _snr(self, frames):
     rms = audioop.rms(b''.join(frames), int(self._input_bits/8))
     if rms > 0 and self._threshold > 0:
         return 20.0 * math.log(rms/self._threshold, 10)
     else:
         return 0
Ejemplo n.º 54
0
def gen_password(entropy_bits):
    base_entropy = math.log(len(chars)) / math.log(2)
    nchars = int(math.ceil(entropy_bits) / base_entropy)
    return ''.join([random.choice(chars) for i in range(nchars)])
Ejemplo n.º 55
0
def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25),
                smoothing_function=None):
    """
    Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all 
    the hypotheses and their respective references.  

    Instead of averaging the sentence level BLEU scores (i.e. marco-average 
    precision), the original BLEU metric (Papineni et al. 2002) accounts for 
    the micro-average precision (i.e. summing the numerators and denominators
    for each hypothesis-reference(s) pairs before the division).
    
    >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
    ...         'ensures', 'that', 'the', 'military', 'always',
    ...         'obeys', 'the', 'commands', 'of', 'the', 'party']
    >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
    ...          'ensures', 'that', 'the', 'military', 'will', 'forever',
    ...          'heed', 'Party', 'commands']
    >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which',
    ...          'guarantees', 'the', 'military', 'forces', 'always',
    ...          'being', 'under', 'the', 'command', 'of', 'the', 'Party']
    >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
    ...          'army', 'always', 'to', 'heed', 'the', 'directions',
    ...          'of', 'the', 'party']
    
    >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', 
    ...         'interested', 'in', 'world', 'history']
    >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', 
    ...          'because', 'he', 'read', 'the', 'book']
    
    >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]]
    >>> hypotheses = [hyp1, hyp2]
    >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS
    0.5520...
    
    The example below show that corpus_bleu() is different from averaging 
    sentence_bleu() for hypotheses 
    
    >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1)
    >>> score2 = sentence_bleu([ref2a], hyp2)
    >>> (score1 + score2) / 2 # doctest: +ELLIPSIS
    0.6223...
    
    :param references: a corpus of lists of reference sentences, w.r.t. hypotheses
    :type references: list(list(list(str)))
    :param hypotheses: a list of hypothesis sentences
    :type hypotheses: list(list(str))
    :param weights: weights for unigrams, bigrams, trigrams and so on
    :type weights: list(float)
    :return: The corpus-level BLEU score.
    :rtype: float
    """
    p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches.
    p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref.
    hyp_lengths, ref_lengths = 0, 0
    
    assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their reference(s) should be the same"
    
    # Iterate through each hypothesis and their corresponding references.
    for references, hypothesis in zip(list_of_references, hypotheses):
        # For each order of ngram, calculate the numerator and
        # denominator for the corpus-level modified precision.
        for i, _ in enumerate(weights, start=1): 
            p_i = _modified_precision(references, hypothesis, i)
            p_numerators[i] += p_i.numerator
            p_denominators[i] += p_i.denominator
            
        # Calculate the hypothesis length and the closest reference length.
        # Adds them to the corpus-level hypothesis and reference counts.
        hyp_len =  len(hypothesis)
        hyp_lengths += hyp_len
        ref_lengths += _closest_ref_length(references, hyp_len)    
        
    # Calculate corpus-level brevity penalty.
    bp = _brevity_penalty(ref_lengths, hyp_lengths)
    
    # Collects the various precision values for the different ngram orders.
    p_n = [Fraction(p_numerators[i], p_denominators[i]) 
           for i, _ in enumerate(weights, start=1)]
    
    # Smoothen the modified precision.
    # Note: smooth_precision() converts values into float.
    if smoothing_function:
        p_n = smoothing_function(p_n, references=references, 
                                 hypothesis=hypothesis, hyp_len=hyp_len)
        
    # Calculates the overall modified precision for all ngrams.
    # By sum of the product of the weights and the respective *p_n*
    s = (w * math.log(p_i) if p_i else 0 
         for w, p_i in zip(weights, p_n))
        
    return bp * math.exp(math.fsum(s))
Ejemplo n.º 56
0
import math


def get_sum(target, l):
    ind = preIndex.index(target)
    n = 2 ** (level - l) - 2
    result = 0
    for i in range(1, n + 1):
        result += preIndex[ind + i]
    return result


preIndex = [int(i) for i in input().strip().split(" ")]
middleIndex = [int(i) for i in input().strip().split(" ")]
level = int(math.log(len(middleIndex) + 1, 2))
fathers = [[int(len(middleIndex) / 2)]]
for i in range(level - 2):
    newL = []
    differ = 2 ** (level - 2 - i)
    for k in fathers[i]:
        newL.append(k - differ)
        newL.append(k + differ)
    fathers.append(newL)
for i in range(len(fathers)):
    for j in fathers[i]:
        middleIndex[j] = get_sum(middleIndex[j], i)
for i in range(len(middleIndex)):
    if i % 2 == 0:
        middleIndex[i] = 0
print(*middleIndex,end="")
Ejemplo n.º 57
0
def edit_function():
	addr_space = int(g.get_property("addr_space"))
	return int(math.log(addr_space, 2) - math.log(dist(), 2))
Ejemplo n.º 58
0
n = 0;
for tag in Pwgt:
	Pwgt[tag][unknown]=.1
for tag in tags:
    if sum(Pwgt[tag].values()) > n:
        mfreq_tag = tag
        n = sum(Pwgt[tag].values())
n = 0;
for tag in tags:
    if sum(Ptgt[tag].values()) > n:
        mfreq_taggt = tag
        n = sum(Pwgt[tag].values())
for tag in Pwgt:
    obs = sum(Pwgt[tag].values())
    for word in Pwgt[tag]:
        Pwgt[tag][word] = math.log(Pwgt[tag][word])-math.log(obs)
for ptag in Ptgt:
    obs = sum(Ptgt[ptag].values())
    for ctag in Ptgt[ptag]:
        Ptgt[ptag][ctag] = math.log(Ptgt[ptag][ctag])-math.log(obs)
start = "START"
end = "END"
keys = list(Ptgt.keys())
keys.remove(start)
keys.append(end)

#First, we make an FSA that utilizes P(T|T)
f = open("ptgt.fst.txt","w")

for tag in Ptgt[start]:
	f.write("{} {} {} {}\n".format(0, keys.index(tag)+1, tag, -Ptgt[start][tag]))
Ejemplo n.º 59
0
from random import randint
from math import log

RandomNumbers = []
RandMaxNumber = int(input('Введите размер массива\n'))
for i in range(1 << int(round(log(RandMaxNumber, 2)))):
    if i <= RandMaxNumber:
        RandomNumbers.append(randint(1, 10000))
    else:
        RandomNumbers.append(0)  #заполняем нулями
print('Случайные числа')
print(' '.join(str(RandomNumbers)))
print('Количество элементов в списке {}'.format(len(RandomNumbers)))
#print('Отдельное случайное число - {}'.format(randint(1, 10000)))
Ejemplo n.º 60
0
def sentence_bleu(references, hypothesis, weights=(0.25, 0.25, 0.25, 0.25),
                  smoothing_function=None):
    """
    Calculate BLEU score (Bilingual Evaluation Understudy) from
    Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.
    "BLEU: a method for automatic evaluation of machine translation." 
    In Proceedings of ACL. http://www.aclweb.org/anthology/P02-1040.pdf

    >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which',
    ...               'ensures', 'that', 'the', 'military', 'always',
    ...               'obeys', 'the', 'commands', 'of', 'the', 'party']

    >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops',
    ...               'forever', 'hearing', 'the', 'activity', 'guidebook',
    ...               'that', 'party', 'direct']

    >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that',
    ...               'ensures', 'that', 'the', 'military', 'will', 'forever',
    ...               'heed', 'Party', 'commands']

    >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which',
    ...               'guarantees', 'the', 'military', 'forces', 'always',
    ...               'being', 'under', 'the', 'command', 'of', 'the',
    ...               'Party']

    >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the',
    ...               'army', 'always', 'to', 'heed', 'the', 'directions',
    ...               'of', 'the', 'party']

    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS
    0.5045...

    >>> sentence_bleu([reference1, reference2, reference3], hypothesis2) # doctest: +ELLIPSIS
    0.3969...

    The default BLEU calculates a score for up to 4grams using uniform
    weights. To evaluate your translations with higher/lower order ngrams, 
    use customized weights. E.g. when accounting for up to 6grams with uniform
    weights:

    >>> weights = (0.1666, 0.1666, 0.1666, 0.1666, 0.1666)
    >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights)
    0.45838627164939455
    
    :param references: reference sentences
    :type references: list(list(str))
    :param hypothesis: a hypothesis sentence
    :type hypothesis: list(str)
    :param weights: weights for unigrams, bigrams, trigrams and so on
    :type weights: list(float)
    :return: The sentence-level BLEU score.
    :rtype: float
    """
    # Calculates the brevity penalty.
    # *hyp_len* is referred to as *c* in Papineni et. al. (2002)
    hyp_len = len(hypothesis)
    # *closest_ref_len* is referred to as *r* variable in Papineni et. al. (2002)
    closest_ref_len = _closest_ref_length(references, hyp_len)
    bp = _brevity_penalty(closest_ref_len, hyp_len)
    
    # Calculates the modified precision *p_n* for each order of ngram.
    p_n = [_modified_precision(references, hypothesis, i)
            for i, _ in enumerate(weights, start=1)]

    # Smoothen the modified precision.
    # Note: smooth_precision() converts values into float.
    if smoothing_function:
        p_n = smoothing_function(p_n, references=references, 
                                 hypothesis=hypothesis, hyp_len=hyp_len)
    
    # Calculates the overall modified precision for all ngrams.
    # By sum of the product of the weights and the respective *p_n*
    s = (w * math.log(p_i) if p_i else 0 
         for w, p_i in zip(weights, p_n))
    sum_s = math.fsum(s)
    if sum_s == 0 and all(p_n) == 0:
        return 0
    return bp * math.exp(sum_s)