def __call__(self, requestsize=1, write=False): """Block the calling program if the throttle time has not expired. Parameter requestsize is the number of Pages to be read/written; multiply delay time by an appropriate factor. Because this seizes the throttle lock, it will prevent any other thread from writing to the same site until the wait expires. """ self.lock.acquire() try: wait = self.waittime(write=write) # Calculate the multiplicity of the next delay based on how # big the request is that is being posted now. # We want to add "one delay" for each factor of two in the # size of the request. Getting 64 pages at once allows 6 times # the delay time for the server. self.next_multiplicity = math.log(1 + requestsize) / math.log(2.0) self.wait(wait) if write: self.last_write = time.time() else: self.last_read = time.time() finally: self.lock.release()
def get_nth_prime(n): """ Get nth prime number >>> get_nth_prime(-1) Traceback (most recent call last): ... ValueError: n is -1 but expected be a positive integer >>> get_nth_prime(4) 7 >>> get_nth_prime(10001) 104743 >>> get_nth_prime(50000) 611953 """ if n < 1 or n != int(n): raise ValueError("n is {0} but expected be a positive integer".format(n)) if n == 1: return 2 if n == 2: return 3 if n == 3: return 5 if n == 4: return 7 if n == 5: return 11 import math upper = n * math.log(n) upper += upper * math.log(n) upper = math.ceil(upper) # use rosser's theorem for upper bound return get_primes(upper)[n - 1]
def entropy_root(lstOfLst): target_lst = [] dict = {'0':0} rows = len(lstOfLst) - 1 target_col = len(lstOfLst[0]) - 1 # print "Rows in example set: ",rows+1," and index of target set: ", target_col n = 0 entropy = 0 while n <= rows: target_lst.append(lstOfLst[n][target_col]) n = n +1 target_lst1 = list(set(target_lst)) # print target_lst,target_lst1 n1 = len(target_lst1) n2 = len(target_lst) while n1 > 0: k = n2 cnt = 0 while k > 0: if target_lst[k-1] == target_lst1[n1-1]: cnt = cnt + 1 # print "Inside count",cnt,target_lst[k-1],target_lst1[n1-1] k = k - 1 dict.update({target_lst1[n1-1]:cnt}) n1 = n1 -1 # print dict n1 = len(target_lst1) while n1 > 0: # chr = str(dict[target_lst1[n1 - 1]]) # print dict[target_lst1[n1 - 1]] p = float(dict[target_lst1[n1 - 1]])/float((rows + 1)) # print p entropy = entropy + (p * (math.log(p)/math.log(2))) n1 = n1 - 1 return (-1*entropy)
def _get_value_log(self, x, mu, v): """log basic 2""" try: return loggamma(x+v) - loggamma(x+1) - loggamma(v) + v*log(v) - v*log(v+mu) + x*log(mu) - x*log(v+mu) except ValueError: #print('_get_value_log ValueError', x, mu, v, file=sys.stderr) return 1
def getFitness(self, tagList): tagList = list(tagList) # add start symbols and end symbols for i in range(self.N - 1): tagList.insert(0, '^') tagList.append('$') # initialize the variables answer = float(0.0) # calculate numerator & denominator length = len(tagList) # print "----- before calculation -----" for start in range(length - self.N + 1): tmp = [] for index in range(self.N): tmp.append(tagList[start+index]) gramTuple = tuple(tmp) # now gramTuple is the tuple for this NGRAM (self). gramTupleProb = self.getProb(gramTuple) answer += math.log(gramTupleProb) if start != 0: prefixGramTuple = self.getPrefixGram(gramTuple) prefixGramTupleProb = self.prefixNGRAM.getProb(prefixGramTuple) answer -= math.log(prefixGramTupleProb) # print "numerator = %f, denominator = %f, answer = %f" % (numerator, denominator, answer) # print "----- after calculation -----" # special casef return answer
def logadd(x,y): """ A helper function for log addition """ from math import log,exp if x>y: return x+log(1.+exp(y-x)) else: return y+log(1.+exp(x-y))
def make_dictionary(file_array): cufflinks_dict={} for i in range(0,len(file_array)): if file_array[i] != '': prelim_info_list=[] each_gene_list=file_array[i].split("\t") try: ##Prelimnary Info entry_name=each_gene_list[0] Gene_ID=each_gene_list[3] Gene_Name=each_gene_list[4] tss_id=each_gene_list[5] locus=each_gene_list[6] length=each_gene_list[7] coverage=each_gene_list[8] FPKM=each_gene_list[9] log2_FPKM=math.log(float(FPKM)+1)/math.log(2) if entry_name not in cufflinks_dict: cufflinks_dict[entry_name]=entry_name+"\t"+Gene_ID+"\t"+Gene_Name+"\t"+tss_id+"\t"+locus+"\t"+str("%.4f" % log2_FPKM) else: pass except: pass return cufflinks_dict
def increase(self): value=math.log(self.read()) value+=.25 if math.exp(value)>=self.maxvalue: value=math.log(self.maxvalue) print (value) self.write(int(math.exp(value)))
def TF_IDF(): print('Doing TF_IDF', file=sys.stderr) global TFIDF, docWeight, index if os.path.isfile('TFIDF.dat') and os.path.isfile('docWeight.dat') and os.path.isfile('index.dat'): f = open('TFIDF.dat', 'rb') TFIDF = pickle.load(f) f.close() f = open('docWeight.dat', 'rb') docWeight = pickle.load(f) f.close() f = open('index.dat', 'rb') index = pickle.load(f) f.close() else: print('.dat not exist, generating', file=sys.stderr) TFIDF = {} docCnt = len(docSize) avgSize = 0 index = [[] for i in range(docCnt)] for i in range(docCnt): avgSize += docSize[i] avgSize /= docCnt docWeight = [0 for i in range(docCnt)] para_b = 0.7 # tuning d = [(1 - para_b + para_b*docSize[i]/avgSize) for i in range(docCnt)] for i in invIndexUnigram: # word id IDF = math.log( docCnt / len(invIndexUnigram[i]) ) TFIDF[i] = {} for j in invIndexUnigram[i]: # doc id v = (invIndexUnigram[i][j] / d[j]) * IDF TFIDF[i][j] = v docWeight[j] += v * v index[j].append(i) for i in invIndexBigram: # word id IDF = math.log( docCnt / len(invIndexBigram[i]) ) TFIDF[i] = {} for j in invIndexBigram[i]: # doc id v = (invIndexBigram[i][j] / d[j]) * IDF TFIDF[i][j] = v docWeight[j] += v * v index[j].append(i) f = open('TFIDF.dat', 'wb') pickle.dump(TFIDF, f) f.close() f = open('docWeight.dat', 'wb') pickle.dump(docWeight, f) f.close() f = open('index.dat', 'wb') pickle.dump(index, f) f.close() printTime()
def optimize_hyperparameters(self, samples=5, step=3.0): old_hyper_parameters = [math.log(self._alpha_alpha), math.log(self._alpha_beta)] for ii in xrange(samples): log_likelihood_old = self.compute_likelihood(self._alpha_alpha, self._alpha_beta) log_likelihood_new = math.log(random.random()) + log_likelihood_old #print("OLD: %f\tNEW: %f at (%f, %f)" % (log_likelihood_old, log_likelihood_new, self._alpha_alpha, self._alpha_beta)) l = [x - random.random() * step for x in old_hyper_parameters] r = [x + step for x in old_hyper_parameters] for jj in xrange(self._alpha_maximum_iteration): new_hyper_parameters = [l[x] + random.random() * (r[x] - l[x]) for x in xrange(len(old_hyper_parameters))] trial_alpha, trial_beta = [math.exp(x) for x in new_hyper_parameters] lp_test = self.compute_likelihood(trial_alpha, trial_beta) if lp_test > log_likelihood_new: self._alpha_alpha = math.exp(new_hyper_parameters[0]) self._alpha_beta = math.exp(new_hyper_parameters[1]) #self._alpha_sum = self._alpha_alpha * self._K #self._beta_sum = self._alpha_beta * self._number_of_language_types old_hyper_parameters = [math.log(self._alpha_alpha), math.log(self._alpha_beta)] break else: for dd in xrange(len(new_hyper_parameters)): if new_hyper_parameters[dd] < old_hyper_parameters[dd]: l[dd] = new_hyper_parameters[dd] else: r[dd] = new_hyper_parameters[dd] assert l[dd] <= old_hyper_parameters[dd] assert r[dd] >= old_hyper_parameters[dd] print("\nNew hyperparameters (%i): %f %f" % (jj, self._alpha_alpha, self._alpha_beta))
def compute_disp_ntaps(dm,bw,freq): NTLIMIT=65536*2 # # Dt calculations are in Mhz, rather than Hz # crazy astronomers.... mbw = bw/1.0e6 mfreq = freq/1.0e6 f_lower = mfreq-(mbw/2) f_upper = mfreq+(mbw/2) # Compute smear time Dt = dm/2.41e-4 * (1.0/(f_lower*f_lower)-1.0/(f_upper*f_upper)) # ntaps is now bandwidth*smeartime ntaps = bw*Dt if (ntaps < 32): ntaps = 32 # special "flag" from command-line invoker to get around a bug # in Gnu Radio involving the FFT filter implementation # we can *never* increase the size of an FFT filter at runtime # but can decrease it. So there's a special "startup" flag (dm=1500.0) # that causes us to return the NTLIMIT number of taps # if (dm >= 1500.0): ntaps = NTLIMIT if (ntaps > NTLIMIT): ntaps = NTLIMIT ntaps = int(math.log(ntaps) / math.log(2)) ntaps = int(math.pow(2,ntaps+1)) return(int(ntaps))
def __call__(self, state, scope, pos, paramTypes, datum, classModel): ll = 0.0 if isinstance(datum, list) or isinstance(datum, tuple): if len(datum) != len(classModel): raise PFARuntimeException("datum and classModel misaligned", self.errcodeBase + 0, self.name, pos) for i, x in enumerate(datum): mu = classModel[i]["mean"] vari = classModel[i]["variance"] if vari <= 0.0: raise PFARuntimeException("variance less than or equal to zero", self.errcodeBase + 1, self.name, pos) ll += -0.5*math.log(2.*math.pi * vari) ll += -0.5*((x - mu)**2 / vari) return ll else: datumkeys = datum.keys() modelkeys = classModel.keys() if set(datumkeys) != set(modelkeys): raise PFARuntimeException("datum and classModel misaligned", self.errcodeBase + 0, self.name, pos) for feature in datumkeys: x = datum[feature] mu = classModel[feature]["mean"] vari = classModel[feature]["variance"] if vari <= 0.0: raise PFARuntimeException("variance less than or equal to zero", self.errcodeBase + 1, self.name, pos) ll += -0.5*math.log(2.*math.pi * vari) ll += -0.5*((x - mu)**2 / vari) return ll
def mdl (g): """ the Minimum Descrition Length calculator for Bayesian network g """ n = len (g.V) # the variable count N = len (g.data)# the sample number logn = math.log (n, 2) # value of log (n) logN = math.log (N, 2) # value of log (N) complexity = sum([logn * len(g.getParentOf(v)) + logN / 2 * product (g.getParentOf(v).cards()) * (v.card - 1) for v in g.V]) logll = 0 #log likelihood for v in g.V: for parentVals in g.getParentOf (v).allAssignments (): for val in v.values: # assignment of the parent parentAssignments = dict(zip(map(lambda p: p.var, g.getParentOf (v)), parentVals)) assignments = parentAssignments.copy () #including the child value in the assignment assignments[v.var] = val #the empirical count of the given assignments of parent parentN = g.N (**parentAssignments) #the empirical count of the given assignments of parent and child childN = g.N (**assignments) if childN != 0: logll += (childN * math.log (childN / parentN, 2)) else: pass #nothing happens return -logll + complexity
def command_line(veb, ra, ov, pr): l = len(sys.argv) for i in xrange(1, l): if not is_switch(sys.argv[i]): break for j in xrange(i, l): # Start with the first non-switch if j != i: # Pretty printing print response = sys.argv[j] if valid_input(response): response = response.replace('^', '**') try: n = eval(response) int(n) except (SyntaxError, TypeError, ValueError): help() else: help() print 'Factoring %d:' % n if n < 0: print -1 n = -n if n == 0: print '0 does not have a well-defined factorization.' continue elif n == 1: print 1 continue if ov == DUMMY: ov = 2*math.log(math.log(n)) for factor in factors(n, veb, ra, ov, pr): print factor
def could_be_prime(n): '''Performs some trials to compute whether n could be prime. Run time is O(N^3 / (log N)^2) for N bits. Returns whether it is possible for n to be prime (True or False). ''' if n < 2: return False if n == 2: return True if not n & 1: return False product = ONE log_n = int(math.log(n)) + 1 bound = int(math.log(n) / (LOG_2 * math.log(math.log(n))**2)) + 1 if bound * log_n >= n: bound = 1 log_n = int(sqrt(n)) prime_bound = 0 prime = 3 for _ in xrange(bound): p = [] prime_bound += log_n while prime <= prime_bound: p.append(prime) prime = next_prime(prime) if p != []: p = prod(p) product = (product * p) % n return gcd(n, product) == 1
def compute_accuracy(self, x_original, y_original, t0, t1, h, base=math.e): p = plotter() # The estimated function is N1 [x, y] = p.hist2fun(history=h) x_temp = x[0:1] + [x[2*i+1] for i in range(len(x)/2)] x = x_temp y_temp = y[0:1] + [y[2*i+1] for i in range(len(y)/2)] y = y_temp # Now we merge the two vectors x_original and x, then we add t0 and t1 x_temp = x_original + x x_temp = set(x_temp) # removing duplicates x_temp = list(x_temp) x_temp.sort() #print x_temp inf = np.array(x_temp)>t0 sup = np.array(x_temp)<t1 inf_sup = inf*sup x_temp = set(inf_sup * np.array(x_temp)) x_temp = list(x_temp) x_temp[0] = t0 x_temp.append(t1) x_vect = x_temp # Now we have the vector. We can compute the formula sum_temp = 0 for i in range(len(x_vect)-1): N0_i = float(self.evaluate_func(x_original, y_original, x_vect[i])) N1_i = float(self.evaluate_func(x, y, x_vect[i])) sum_temp += (abs(N0_i-N1_i)/(N0_i+N1_i))*(math.log(x_vect[i+1], base)-math.log(x_vect[i], base)) result = float(sum_temp)/(math.log(t1, base)-math.log(t0, base)) return result
def relate(size, base): if size == 0: return base size = float(size) base = float(base) if abs(size - base) < 0.1: return 0 sign = -1 if size < base else 1 endp = 0 if size < base else 36 diff = (abs(base - size) * 3) + ((36 - size) / 100) logb = abs(base - endp) if logb == 1.0: logb = 1.1 try: result = sign * math.log(diff, logb) except ValueError: if diff < 0: # Size is both very large and close to base return 0 if logb == 0: logb = 1e-6 if diff == 0: diff = 1e-6 result = sign * math.log(diff, logb) return result
def get_decision_given_context(theta, type, decision, context): global cache_normalizing_decision, feature_index, source_to_target_firing, model1_probs, ets m1_event_prob = model1_probs.get((decision, context), 0.0) fired_features = get_wa_features_fired(type=type, decision=decision, context=context, dictionary_features=dictionary_features, ishybrid=True) theta_dot_features = sum([theta[feature_index[f]] * f_wt for f_wt, f in fired_features]) numerator = m1_event_prob * exp(theta_dot_features) if (type, context) in cache_normalizing_decision: denom = cache_normalizing_decision[type, context] else: denom = ets[context] target_firings = source_to_target_firing.get(context, set([])) for tf in target_firings: m1_tf_event_prob = model1_probs.get((tf, context), 0.0) tf_fired_features = get_wa_features_fired(type=type, decision=tf, context=context, dictionary_features=dictionary_features, ishybrid=True) tf_theta_dot_features = sum([theta[feature_index[f]] * f_wt for f_wt, f in tf_fired_features]) denom += m1_tf_event_prob * exp(tf_theta_dot_features) cache_normalizing_decision[type, context] = denom try: log_prob = log(numerator) - log(denom) except ValueError: print numerator, denom, decision, context, m1_event_prob, theta_dot_features raise BaseException return log_prob
def compress(temp, press=70., sali=0): '''Compute water or brine compressibility from temperature, pressure and salinity, according to Spivey et al (2004). temp: temperature in degrees Celsius. press: pressure in MPa. sali: Concentration of NaCl in ppm.''' mols = mol(sali) # If salinity == 0, the fluid is water, so we compute the water # compressibility if sali == 0: compress = (1. / coef(EwT, temp)) * log(abs(coef(EwT, temp) * (p / 70.) \ + coef(FwT, temp))) else: Fb = Fw for j in range(len(Fcm)): Fb += coef(Fcm[j], t) * mols ** (j / 2. + 0.5) Eb = Ew + coef(Ecm, t) * mols compress = (1. / Eb) * log(abs(Eb * (p / 70.) + Fb)) return compress
def lp(cs, C, ls, ps): """ Args: cs: a list containing the cost of probing `X_1, ..., X_n` C: the cost budget ls: a list of the lengths of the intervals `I_1, ..., I_m`. Each element of the list contains the length of the corresponding interval. ps: a list of functions, each of which takes that take in one argument `j` and returns `Pr[X_i >= a_j]` Returns: a triple of type `(pulp.LpVariable, list of pulp.LpVariable, pulp.LpProblem)` with values of `(z, list of y_i, unsolved linear program)`. """ assert len(ps) == len(cs) n = len(ps) m = len(ls) problem = pulp.LpProblem('Step 1', pulp.LpMinimize) z = pulp.LpVariable('z', cat='Integer') ys = [pulp.LpVariable('y' + str(i), lowBound=0, upBound=1, cat='Integer') for i in xrange(n)] problem += z for j in xrange(1, m + 1): aa = (math.log(1.0 / p(j)) for p in ps) problem += pulp.lpDot(ys, aa) <= math.log(ls[j - 1]) - z, 'j=' + str(j) problem += pulp.lpDot(cs, ys) <= C, 'cost' return z, ys, problem
def energy(self): sum = 0.0 sum -= di.norm.logpdf(self.data, loc=self.mu, scale=self.sigma).sum() #Now add in the priors... sum -= log(self.sigma)*(-0.5) - self.nu/2 * (self.mu-self.priormu)**2/self.sigma sum -= log(self.sigma)*(self.kappa+2)/(-2) - 0.5*self.priorsigma/self.sigma return sum
def estimDiv(c, psmc, r, t): """Estimate divergence using eq 12 """ N0 = 0 if psmc: if not r: # parse psmc f = open(psmc, 'r') line = f.readline().split("-eN ") t = [float(i.split()[0]) for i in line[1:]] t.insert(0, 0.0) r = [float(i.split()[1]) for i in line[1:]] N0 = float(line[0].split()[1]) / float(line[0].split()[4]) r.insert(0, 1.0) i = 0 nc = 1.0 while (1-nc*exp(-(t[i+1]-t[i])/r[i])) < c: nc *= exp(-(t[i+1]-t[i])/r[i]) i += 1 #print("i:{}, t[i]:{}, t[i+1]:{}, r[i]:{}, nc:{}".format(i, t[i], t[i+1], r[i], nc)) j = i print("nc = {}, 1-nc = {}".format(nc, 1-nc)) T_hat = -r[j]*log((1-c) / nc) + t[j] else: T_hat = -log(1-c) # assumes constant popsize return(r, t, N0, T_hat)
def __init__(self, ref_file, max_n=100, verbose=False): ''' Read the reference file and store wordcounts as class variables: - a dictionary mapping words to their log probabilities - a dictionary mapping character patterns (e.g. 'abccda' for 'dotted') to a list of words and their log probabilities, sorted by probability ''' self.max_n = max_n self.verbose = verbose if self.verbose: print 'processing reference file...' # Get words and word probabilities from text and put in dictionary self.vectorizer = CountVectorizer(token_pattern=r'(?u)\b[a-zA-Z]+\b') wordcounts = self.__get_wordcounts(ref_file) self.word_dict = {word:math.log(count+1.0) for count, word in wordcounts} # Also put words and probabilities into the dictionary keyed by pattern self.words_by_pattern = {} for count, word in wordcounts: pattern = self.__word_to_pattern(word) prob = math.log(count+1.0) if pattern in self.words_by_pattern: self.words_by_pattern[pattern].append((prob, word)) else: self.words_by_pattern[pattern] = [(prob, word)] # Initial null solution self.solution = None if self.verbose: print '...done\n'
def make_non_differential_constellation(m, gray_coded): side = int(pow(m, 0.5)) if (not isinstance(m, int) or m < 4 or not is_power_of_four(m)): raise ValueError("m must be a power of 4 integer.") # Each symbol holds k bits. k = int(log(m) / log(2.0)) if gray_coded: # Number rows and columns using gray codes. gcs = gray_code(side) # Get inverse gray codes. i_gcs = mod_codes.invert_code(gcs) else: i_gcs = range(0, side) # The distance between points is found. step = 2.0/(side-1) gc_to_x = [-1 + i_gcs[gc]*step for gc in range(0, side)] # First k/2 bits determine x position. # Following k/2 bits determine y position. const_map = [] for i in range(m): y = gc_to_x[get_bits(i, 0, k/2)] x = gc_to_x[get_bits(i, k/2, k/2)] const_map.append(complex(x,y)) return const_map
def predict_class(prediction, prob_other, class_doc_stats, class_prob, word_list, word_dict): prob_values = [] new_prob_values = [] for class_name in class_prob: prob_values.append((class_name, class_prob[class_name])) inpfile = open("stopWords.txt", "r") line = inpfile.readline() stopWords = [] while line: stopWord = line.strip() stopWords.append(stopWord) line = inpfile.readline() inpfile.close() for val in prob_values: prob = math.log(val[1], 2) class_name = val[0] for word in word_list: word = word.lower() # val = re.search(r"^[a-zA-Z][a-zA-Z0-9]*[a-zA-Z]+[a-zA-Z0-9]*$", word) # if (word in stopWords): # continue if word in word_dict: prob = prob + math.log(Decimal(word_dict[word][class_name]), 2) else: prob = prob + math.log(Decimal(prob_other[class_name]), 2) new_prob_values.append((class_name, prob)) prob_values = new_prob_values prob_values.sort(key=lambda tup: tup[1], reverse=True) return prob_values, prob_values[0][0]
def __call__(self, x, pos=None): "Return the format for tick val x at position pos" vmin, vmax = self.axis.get_view_interval() vmin, vmax = mtransforms.nonsingular(vmin, vmax, expander=0.05) d = abs(vmax - vmin) b = self._base if x == 0: return "0" sign = np.sign(x) # only label the decades fx = math.log(abs(x)) / math.log(b) isDecade = self.is_decade(fx) if not isDecade and self.labelOnlyBase: s = "" # if 0: pass elif fx > 10000: s = "%1.0e" % fx # elif x<1: s = '$10^{%d}$'%fx # elif x<1: s = '10^%d'%fx elif fx < 1: s = "%1.0e" % fx else: s = self.pprint_val(fx, d) if sign == -1: s = "-%s" % s return self.fix_minus(s)
def __call__(self, x, pos=None): "Return the format for tick val x at position pos" b = self._base # only label the decades if x == 0: return "$0$" sign = np.sign(x) fx = math.log(abs(x)) / math.log(b) isDecade = self.is_decade(fx) usetex = rcParams["text.usetex"] if sign == -1: sign_string = "-" else: sign_string = "" if not isDecade and self.labelOnlyBase: s = "" elif not isDecade: if usetex: s = r"$%s%d^{%.2f}$" % (sign_string, b, fx) else: s = "$\mathdefault{%s%d^{%.2f}}$" % (sign_string, b, fx) else: if usetex: s = r"$%s%d^{%d}$" % (sign_string, b, self.nearest_long(fx)) else: s = r"$\mathdefault{%s%d^{%d}}$" % (sign_string, b, self.nearest_long(fx)) return s
def __next__(self): rv = self.value #------------------------------------------------------------------------ # need to round or we might succumb to the dreaded python rounding # error (eg 0.99999 < 0 when multiplying 1/24.0 by 24) #------------------------------------------------------------------------ if round(self.pos, 8) >= round(self.length_cur, 8): self.value = 1.0 rv = 1.0 self.pos = 0 self.length_cur = Pattern.value(self.length) amp_cur = Pattern.value(self.amp) rate_start = 1.0 rate_end = 1.0 + amp_cur steps = TICKS_PER_BEAT * self.length_cur self.dv = math.exp(math.log(rate_end / rate_start) / steps) self.pos += 1.0 / TICKS_PER_BEAT self.value = self.value * self.dv #------------------------------------------------------------------------ # subtract #------------------------------------------------------------------------ rv = math.log(rv, 2) print("warp: %f" % rv) return rv
def deviation_score(percentage, lower_bound, upper_bound): if percentage < lower_bound: return math.log(lower_bound - percentage, lower_bound) * 100 elif percentage > upper_bound: return math.log(percentage - upper_bound, 100 - upper_bound) * 100 else: return 0
def solve(Xs_info, C, aa, f_cost, epsilon): """ Args: Xs_info: map from distribution to tuple of type (probe cost, p_ij) C: the cost budget aa: the values taken on by the distribution epsilon: the epsilon """ # Compute interval lengths. Note that I_i = [a_j, a_{j + 1}]. assert len(set(aa)) == len(aa), \ 'The values taken on by the distribution must be distinct' assert all(itertools.imap(lambda a: a >= 0, aa)), \ 'The values taken on by the distribution must be nonnegative' ls = [t - s for s, t in zip(aa, aa[1:])] cs, ps = zip(*Xs_info.itervalues()) z, ys, problem = lp(cs, C, ls, ps) print problem problem.solve() print 'z = {}, ys = {}'.format(z.value(), [y.value() for y in ys]) # The subset S corresponding to i s.t. y_i = 1 is feasible s0 = [x for x, y in zip(Xs_info.iterkeys(), ys) if y.value() == 1] # new cost budget C(log log m + log 1/e) m = len(aa) C_relaxed = C * (math.log(math.log(m)) + math.log(1.0 / epsilon)) X_cost = {k: c for k, (c, _) in Xs_info.iteritems()} return minimum_element.minimum_element(X_cost, C_relaxed, f_cost, S_0=s0)
if tf_doc[item] > 0: n[item] = n[item] + 1 TF.append(tf_doc) TFN = [] for TFj in TF: #for each term TFij TFj = [TFij / float(max(TFj)) for TFij in TFj] TFN.append(TFj) N = len(corpus) IDF = [] for termi in range(len(terms)): IDF.append(math.log(No_of_Documents / float(n[termi]), 2)) TFNIDF = [] for TFNj in TFN: TFNjIDF = [] for termIDX in range(len(terms)): TFNjIDF.append(TFNj[termIDX] * IDF[termIDX]) TFNIDF.append(TFNjIDF) TFNIDF_Matrix = np.matrix(TFNIDF) # Column Max C_Max = [TFNIDF_Matrix.max(axis=0).item(i) for i in range(0, len(terms))] #Column Min C_Min = [TFNIDF_Matrix.min(axis=0).item(i) for i in range(0, len(terms))]
def expran(h): """ Return a random variate drawn from the exponential distribution with hazard h and mean 1/h. """ return (-log(1.0 - random()) / h)
def bnldev(n, pp): """ Return a random deviate drawn from the Binomial distribution with parameters n (a positive integer) and pp (a probability). """ # This code was translated from Numerical Recipes in C global nold, pold, pc, plog, pclog, en, oldg if pp <= 0.5: p = pp else: p = 1.0 - pp am = n * p if n < 25: # Direct method bnl = 0.0 for j in xrange(n): if random() < p: bnl += 1.0 elif am < 1.0: # Poisson method g = exp(-am) t = 1.0 for j in xrange(n): t *= random() if t < g: break bnl = j else: # rejection method if n != nold: en = n oldg = gammln(en + 1.0) nold = n if p != pold: pc = 1.0 - p plog = log(p) pclog = log(pc) pold = p sq = sqrt(1.0 * am * pc) while True: while True: angle = pi * random() y = tan(angle) em = sq * y + am if em >= 0 and em < (en + 1.0): break em = floor(em) t = 1.2*sq*(1.0+y*y)*exp(oldg-gammln(em+1.0) \ - gammln(en-em+1.0) \ + em*plog + (en-em)*pclog) if random() < t: break bnl = em if p != pp: bnl = n - bnl return int(round(bnl))
def get_multiplicity(nmin=2, FDR=0.05): p_star_dict = {} G_score_list = [] gene_by_pop_dict = {} for strain in strains: sites_to_remove = get_sites_to_remove(strain) gene_count_dict = {} if strain == 'minimal': dirs = [ 'syn3B_minimal/mm13', 'syn3B_minimal/mm11', 'syn3B_minimal/mm10', 'syn3B_minimal/mm9' ] ref_path = mt.get_path( ) + '/data/syn3B_minimal/reference/Synthetic.bacterium_JCVI-Syn3A.gb' elif strain == 'wildtype': dirs = [ 'syn1.0_wildtype/mm6', 'syn1.0_wildtype/mm4', 'syn1.0_wildtype/mm3', 'syn1.0_wildtype/mm1' ] ref_path = mt.get_path( ) + '/data/syn1.0_wildtype/reference/Synthetic.Mycoplasma.mycoides.JCVI-syn1.0_CP002027.1.gb' effective_gene_lengths, effective_gene_lengths_syn, Lsyn, Lnon, substitution_specific_synonymous_fraction = mt.calculate_synonymous_nonsynonymous_target_sizes( ref_path) for dir in dirs: for i, line in enumerate( open(mt.get_path() + '/data/' + dir + '/annotated.gd', 'r')): line_split = line.strip().split('\t') if line_split[0] not in output_to_keep: continue if line_split[3] + '_' + line_split[4] in sites_to_remove: continue frequency = float([s for s in line_split if 'frequency=' in s][0].split('=')[1]) if frequency != 1: continue if line_split[0] == 'SNP': if [s for s in line_split if 'snp_type=' in s ][0].split('=')[1] == 'nonsynonymous': locus_tag = [ s for s in line_split if 'locus_tag=' in s ][0].split('=')[1] frequency = float([ s for s in line_split if 'frequency=' in s ][0].split('=')[1]) if ';' in locus_tag: for locus_tag_j in locus_tag.split(';'): if locus_tag_j not in gene_count_dict: gene_count_dict[locus_tag_j] = 0 gene_count_dict[locus_tag_j] += 1 else: if locus_tag not in gene_count_dict: gene_count_dict[locus_tag] = 0 gene_count_dict[locus_tag] += 1 else: continue else: if len( [s for s in line_split if 'gene_position=coding' in s ]) >= 1: locus_tag = [ s for s in line_split if 'locus_tag=' in s ][0].split('=')[1] frequency = float([ s for s in line_split if 'frequency=' in s ][0].split('=')[1]) if ';' in locus_tag: for locus_tag_j in locus_tag.split(';'): if locus_tag_j not in gene_count_dict: gene_count_dict[locus_tag_j] = 0 gene_count_dict[locus_tag_j] += 1 else: if locus_tag not in gene_count_dict: gene_count_dict[locus_tag] = 0 gene_count_dict[locus_tag] += 1 # get multiplicity scores gene_parallelism_statistics = {} for gene_i, length_i in effective_gene_lengths.items(): gene_parallelism_statistics[gene_i] = {} gene_parallelism_statistics[gene_i]['length'] = length_i gene_parallelism_statistics[gene_i]['observed'] = 0 gene_parallelism_statistics[gene_i]['multiplicity'] = 0 # save number of mutations for multiplicity for locus_tag_i, n_muts_i in gene_count_dict.items(): gene_parallelism_statistics[locus_tag_i]['observed'] = n_muts_i L_mean = np.mean(list(effective_gene_lengths.values())) L_tot = sum(list(effective_gene_lengths.values())) n_tot = sum(gene_count_dict.values()) # don't include taxa with less than 20 mutations print("N_total = " + str(n_tot)) # go back over and calculate multiplicity for locus_tag_i in gene_parallelism_statistics.keys(): # double check the measurements from this gene_parallelism_statistics[locus_tag_i][ 'multiplicity'] = gene_parallelism_statistics[locus_tag_i][ 'observed'] * 1.0 / effective_gene_lengths[ locus_tag_i] * L_mean gene_parallelism_statistics[locus_tag_i][ 'expected'] = n_tot * gene_parallelism_statistics[locus_tag_i][ 'length'] / L_tot pooled_multiplicities = np.array([ gene_parallelism_statistics[gene_name]['multiplicity'] for gene_name in gene_parallelism_statistics.keys() if gene_parallelism_statistics[gene_name]['multiplicity'] >= 1 ]) pooled_multiplicities.sort() pooled_tupe_multiplicities = np.array([ (gene_parallelism_statistics[gene_name]['multiplicity'], gene_parallelism_statistics[gene_name]['observed']) for gene_name in gene_parallelism_statistics.keys() if gene_parallelism_statistics[gene_name]['multiplicity'] >= 1 ]) pooled_tupe_multiplicities = sorted(pooled_tupe_multiplicities, key=lambda x: x[0]) pooled_tupe_multiplicities_x = [ i[0] for i in pooled_tupe_multiplicities ] pooled_tupe_multiplicities_y = [ i[1] for i in pooled_tupe_multiplicities ] pooled_tupe_multiplicities_y = [ sum(pooled_tupe_multiplicities_y[i:]) / sum(pooled_tupe_multiplicities_y) for i in range(len(pooled_tupe_multiplicities_y)) ] null_multiplicity_survival = mt.NullGeneMultiplicitySurvivalFunction.from_parallelism_statistics( gene_parallelism_statistics) null_multiplicity_survival_copy = null_multiplicity_survival( pooled_multiplicities) null_multiplicity_survival_copy = [ sum(null_multiplicity_survival_copy[i:]) / sum(null_multiplicity_survival_copy) for i in range(len(null_multiplicity_survival_copy)) ] #threshold_idx = numpy.nonzero((null_multiplicity_survival(observed_ms)*1.0/observed_multiplicity_survival)<FDR)[0][0] mult_survival_dict = { 'Mult': pooled_multiplicities, 'Obs_fract': pooled_tupe_multiplicities_y, 'Null_fract': null_multiplicity_survival_copy } mult_survival_df = pd.DataFrame(mult_survival_dict) mult_survival_df_out = mt.get_path( ) + '/data/mult_survival_curves_' + strain + '.txt' mult_survival_df.to_csv(mult_survival_df_out, sep='\t', index=True) # get likelihood score and null test observed_G, pvalue = mt.calculate_total_parallelism( gene_parallelism_statistics) G_score_list.append((strain, observed_G, pvalue)) print(strain, observed_G, pvalue) # Give each gene a p-value, get distribution gene_logpvalues = mt.calculate_parallelism_logpvalues( gene_parallelism_statistics) pooled_pvalues = [] for gene_name in gene_logpvalues.keys(): if (gene_parallelism_statistics[gene_name]['observed'] >= nmin) and (float(gene_logpvalues[gene_name]) >= 0): pooled_pvalues.append(gene_logpvalues[gene_name]) pooled_pvalues = np.array(pooled_pvalues) pooled_pvalues.sort() if len(pooled_pvalues) == 0: continue null_pvalue_survival = mt.NullGeneLogpSurvivalFunction.from_parallelism_statistics( gene_parallelism_statistics, nmin=nmin) observed_ps, observed_pvalue_survival = mt.calculate_unnormalized_survival_from_vector( pooled_pvalues, min_x=-4) # Pvalue version # remove negative minus log p values. neg_p_idx = np.where(observed_ps >= 0) observed_ps_copy = observed_ps[neg_p_idx] observed_pvalue_survival_copy = observed_pvalue_survival[neg_p_idx] pvalue_pass_threshold = np.nonzero( null_pvalue_survival(observed_ps_copy) * 1.0 / observed_pvalue_survival_copy < FDR)[0] if len(pvalue_pass_threshold) == 0: continue threshold_idx = pvalue_pass_threshold[0] pstar = observed_ps_copy[ threshold_idx] # lowest value where this is true num_significant = observed_pvalue_survival[threshold_idx] # make it log base 10 logpvalues_dict = { 'P_value': observed_ps / math.log(10), 'Obs_num': observed_pvalue_survival, 'Null_num': null_pvalue_survival(observed_ps) } logpvalues_df = pd.DataFrame(logpvalues_dict) logpvalues_df_out = mt.get_path( ) + '/data/logpvalues_' + strain + '.txt' logpvalues_df.to_csv(logpvalues_df_out, sep='\t', index=True) p_star_dict[strain] = (num_significant, pstar / math.log(10)) output_mult_gene_filename = mt.get_path( ) + '/data/mult_genes_sig_' + strain + '.txt' output_mult_gene = open(output_mult_gene_filename, "w") output_mult_gene.write(",".join([ "Gene", "Length", "Observed", "Expected", "Multiplicity", "-log10(P)" ])) for gene_name in sorted( gene_parallelism_statistics, key=lambda x: gene_parallelism_statistics.get(x)['observed'], reverse=True): if gene_logpvalues[ gene_name] >= pstar and gene_parallelism_statistics[ gene_name]['observed'] >= nmin: output_mult_gene.write("\n") # log base 10 transform the p-values here as well output_mult_gene.write( "%s, %0.1f, %d, %0.2f, %0.2f, %g" % (gene_name, gene_parallelism_statistics[gene_name]['length'], gene_parallelism_statistics[gene_name]['observed'], gene_parallelism_statistics[gene_name]['expected'], gene_parallelism_statistics[gene_name]['multiplicity'], abs(gene_logpvalues[gene_name]) / math.log(10))) output_mult_gene.close() total_parallelism_path = mt.get_path() + '/data/total_parallelism.txt' total_parallelism = open(total_parallelism_path, "w") total_parallelism.write("\t".join(["Strain", "G_score", "p_value"])) for i in range(len(G_score_list)): taxon_i = G_score_list[i][0] G_score_i = G_score_list[i][1] p_value_i = G_score_list[i][2] total_parallelism.write("\n") total_parallelism.write("\t".join( [taxon_i, str(G_score_i), str(p_value_i)])) total_parallelism.close() with open(mt.get_path() + '/data/p_star.txt', 'wb') as file: file.write( pickle.dumps(p_star_dict)) # use `pickle.loads` to do the reverse
def calc(x): return str(math.log(abs(12*math.sin(int(x)))))
def log_uniform(lo, hi, rate): log_lo = math.log(lo) log_hi = math.log(hi) v = log_lo * (1-rate) + log_hi * rate return math.exp(v)
def log_cluster_assign_score(self, cluster_id): current_cluster_size = self.suffstats[cluster_id].num_pts return log(current_cluster_size + float(self.alpha) / self.num_clusters)
# http://infohost.nmt.edu/~es421/pascal/list12.pas # PROGRAM Tlog # test ln and exp # From Borland Pascal Programs for Scientists and Engineers # by Alan R. Miller, Copyright C 1993, SYBEX Inc import math X = 1.0E-4 / 0.3 for I in range(1, 11): Y = math.log(X) print ' X =', X, ', Exp(Ln) =', math.exp(Y) X = 0.5 * X
error = 0 for i in zip(case2, res_f): ea_err = i[0] - i[1] error += copysign(1, ea_err) * (ea_err**2) error = error / sum(case2) if error < 1e5 and error > -1e5: print('case2') elif error > 0: print('case1') else: print('case3') # Slide master_brute_force(lambda n: n, 4, 2) master_brute_force(lambda n: n * log(n, 10), 2, 2) master_brute_force(lambda n: n * log(n, 10), 1, 3) master_brute_force(lambda n: n**2, 8, 2) master_brute_force(lambda n: n**3, 9, 3) master_brute_force(lambda n: 1, 1, 2) master_brute_force(lambda n: log(n, 10), 2, 2) print() # HW master_brute_force(lambda n: n**2, 5, 2) master_brute_force(lambda n: n**1.5, 5, 2) master_brute_force(lambda n: (n**2) * log(n, 2), 10, 10, base=2) print() # Exam master_brute_force(lambda n: n**2 * log(n, 10), 4, 2) master_brute_force(lambda n: 1, 3, 2)
def hurstExponent(x, d=50): # Find such a natural number OptN that possesses the largest number of # divisors among all natural numbers in the interval [0.99*N,N] dmin, N, N0 = d, x.shape[0], math.floor(0.99 * x.shape[0]) dv = np.zeros((N - N0 + 1, )) for i in range(N0, N + 1): dv[i - N0] = divGtN0(i, dmin).shape[0] optN = N0 + np.max(np.arange(0, N - N0 + 1)[max(dv) == dv]) # Use the first OptN values of x for further analysis x = x[:optN] d = divGtN0(optN, dmin) N = d.shape[0] RSe, ERS = np.zeros((N, )), np.zeros((N, )) # Calculate empirical R/S for i in range(N): RSe[i] = rscalc(x, d[i]) # Compute Anis-Lloyd [1] and Peters [3] corrected theoretical E(R/S) # (see [4] for details) for i in range(N): n = d[i] K = np.arange(1, n) ratio = (n - 0.5) / n * np.sum(np.sqrt((np.ones((n - 1)) * n - K) / K)) if n > 340: ERS[i] = ratio / math.sqrt(0.5 * math.pi * n) else: ERS[i] = (math.gamma(0.5 * (n - 1)) * ratio) / (math.gamma(0.5 * n) * math.sqrt(math.pi)) # Calculate the Anis-Lloyd/Peters corrected Hurst exponent # Compute the Hurst exponent as the slope on a loglog scale ERSal = np.sqrt(0.5 * math.pi * d) Pal = np.polyfit(np.log10(d), np.log10(RSe - ERS + ERSal), 1) Hal = Pal[0] # Calculate the empirical and theoretical Hurst exponents Pe = np.polyfit(np.log10(d), np.log10(RSe), 1) He = Pe[0] P = np.polyfit(np.log10(d), np.log10(ERS), 1) Ht = P[0] # Compute empirical confidence intervals (see [4]) L = math.log2(optN) # R/S-AL (min(divisor)>50) two-sided empirical confidence intervals #pval95 = np.array([0.5-exp(-7.33*log(log(L))+4.21) exp(-7.20*log(log(L))+4.04)+0.5]) lnlnL = math.log(math.log(L)) c1 = [ 0.5 - math.exp(-7.35 * lnlnL + 4.06), math.exp(-7.07 * lnlnL + 3.75) + 0.5, 0.90 ] c2 = [ 0.5 - math.exp(-7.33 * lnlnL + 4.21), math.exp(-7.20 * lnlnL + 4.04) + 0.5, 0.95 ] c3 = [ 0.5 - math.exp(-7.19 * lnlnL + 4.34), math.exp(-7.51 * lnlnL + 4.58) + 0.5, 0.99 ] C = np.array([c1, c2, c3]) detail = (d, optN, RSe, ERS, ERSal) return (Hal, He, Ht, C, detail)
xg_reg.fit(x2,y2) final_portfolio=pd.DataFrame(columns=['ID','ln_LR']) error=[] for x in range(1,601): try: df_test=pd.read_csv('test_portfolio_cleaned17nov_'+str(x)+'.csv') #df_test=df_test.drop(['Unnamed: 0'], axis=1) x_test=df_test.values[:,:30] preds=model.predict(x_test) df_test['Loss_or_Not']=preds df_test_reg=df_test.loc[df_test['Loss_or_Not']==1] x_test_reg=df_test_reg.values[:,:30] preds_reg = xg_reg.predict(x_test_reg) df_test_reg['Loss']=preds_reg df_test=df_test.loc[df_test['Loss_or_Not']==0] df_test['Loss']=0 df_test=df_test.append(df_test_reg) #df_test['Annual_Premium'].sum() #df_test['Loss'].sum() loss_ratio=df_test['Loss'].sum()/df_test['Annual_Premium'].sum() #math.log(loss_ratio) final_portfolio=final_portfolio.append({'ID':'portfolio_'+str(x), 'ln_LR':math.log(loss_ratio)},ignore_index=True) except: error.append(x) pass final_portfolio.to_csv('result.csv',index=False)
def getNDCG(ranklist, gtItem): for i in range(len(ranklist)): item = ranklist[i] if item == gtItem: return math.log(2) / math.log(i+2) return 0
def MQGetPercentage(self, rs_ro_ratio, pcurve): return (math.pow(10,( ((math.log(rs_ro_ratio)-pcurve[1])/ pcurve[2]) + pcurve[0])))
def corpus_bleu( list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=None, auto_reweigh=False, ): """ Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all the hypotheses and their respective references. Instead of averaging the sentence level BLEU scores (i.e. marco-average precision), the original BLEU metric (Papineni et al. 2002) accounts for the micro-average precision (i.e. summing the numerators and denominators for each hypothesis-reference(s) pairs before the division). >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', ... 'ensures', 'that', 'the', 'military', 'always', ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', ... 'ensures', 'that', 'the', 'military', 'will', 'forever', ... 'heed', 'Party', 'commands'] >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which', ... 'guarantees', 'the', 'military', 'forces', 'always', ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party'] >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', ... 'army', 'always', 'to', 'heed', 'the', 'directions', ... 'of', 'the', 'party'] >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', ... 'interested', 'in', 'world', 'history'] >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', ... 'because', 'he', 'read', 'the', 'book'] >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]] >>> hypotheses = [hyp1, hyp2] >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS 0.5920... The example below show that corpus_bleu() is different from averaging sentence_bleu() for hypotheses >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1) >>> score2 = sentence_bleu([ref2a], hyp2) >>> (score1 + score2) / 2 # doctest: +ELLIPSIS 0.6223... :param list_of_references: a corpus of lists of reference sentences, w.r.t. hypotheses :type list_of_references: list(list(list(str))) :param hypotheses: a list of hypothesis sentences :type hypotheses: list(list(str)) :param weights: weights for unigrams, bigrams, trigrams and so on :type weights: list(float) :param smoothing_function: :type smoothing_function: SmoothingFunction :param auto_reweigh: Option to re-normalize the weights uniformly. :type auto_reweigh: bool :return: The corpus-level BLEU score. :rtype: float """ # Before proceeding to compute BLEU, perform sanity checks. p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches. p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref. hyp_lengths, ref_lengths = 0, 0 assert len(list_of_references) == len(hypotheses), ( "The number of hypotheses and their reference(s) should be the " "same " ) # Iterate through each hypothesis and their corresponding references. for references, hypothesis in zip(list_of_references, hypotheses): # For each order of ngram, calculate the numerator and # denominator for the corpus-level modified precision. for i, _ in enumerate(weights, start=1): p_i = modified_precision(references, hypothesis, i) p_numerators[i] += p_i.numerator p_denominators[i] += p_i.denominator # Calculate the hypothesis length and the closest reference length. # Adds them to the corpus-level hypothesis and reference counts. hyp_len = len(hypothesis) hyp_lengths += hyp_len ref_lengths += closest_ref_length(references, hyp_len) # Calculate corpus-level brevity penalty. bp = brevity_penalty(ref_lengths, hyp_lengths) # Uniformly re-weighting based on maximum hypothesis lengths if largest # order of n-grams < 4 and weights is set at default. if auto_reweigh: if hyp_lengths < 4 and weights == (0.25, 0.25, 0.25, 0.25): weights = (1 / hyp_lengths,) * hyp_lengths # Collects the various precision values for the different ngram orders. p_n = [ Fraction(p_numerators[i], p_denominators[i], _normalize=False) for i, _ in enumerate(weights, start=1) ] # Returns 0 if there's no matching n-grams # We only need to check for p_numerators[1] == 0, since if there's # no unigrams, there won't be any higher order ngrams. if p_numerators[1] == 0: return 0 # If there's no smoothing, set use method0 from SmoothinFunction class. if not smoothing_function: smoothing_function = SmoothingFunction().method0 # Smoothen the modified precision. # Note: smoothing_function() may convert values into floats; # it tries to retain the Fraction object as much as the # smoothing method allows. p_n = smoothing_function( p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_lengths ) s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n)) s = bp * math.exp(math.fsum(s)) return s
from math import log from src.prune_seqs import prune_seqs from src.fmt_read_id import fmt_read_id from src.filesystem import OPEN_FUNCS, FORMATTING_FUNCS, is_gzipped from src.printlog import printlog_warning FASTQ_LINES_PER_READ = 4 # Function for getting Q value from Phred33 character: substr_phred33 = lambda q_symb: ord(q_symb) - 33 # List of probabilities corresponding to indices (index is Q, value is the propability): q2p_map = [10 ** (-q/10) for q in range(128)] # 127 -- max value of a signed byte # Function for accessing propabilities by Q: qual2prop = lambda q: q2p_map[q] # Function for accessing Q by propability: prop2qual = lambda p: round(-10 * log(p, 10), 2) def get_read_avg_qual(qual_str): # Function calculates mean quality of a single read. # :param qual_str: read's quality line in Phred33; # :type qual_str: str; quals = map(substr_phred33, qual_str) # get Qs err_props = map(qual2prop, quals) # convert Qs to propabilities avg_err_prop = sum(err_props) / len(qual_str) # calculate average propability return prop2qual(avg_err_prop) # end def get_read_avg_qual def form_packet_numseqs(fastq_file, packet_size, fmt_func, max_seq_len):
#args3['str-init'] = [-40.0] #args3[('str-init', 'reset-optimizer', 'memory-efficient-fp16')] = [(-10.0, True, True)] args3['str-init'] = [-10.0, -7.5] args3['str-sparse'] = [False] args3['str-lr'] = [0.00, 0.001] args3['str-mult-offset'] = [0.01] args3[('str-noisy-relu', 'str-weight-noise')] = [(0.1, 0.0), (0.2, 0.0), (0.3, 0.0)] args3[('max-update', 'warmup-updates', '')] = [(16000, 3000, ' /private/home/timdettmers/data/cc_small')] args3['weight-decay'] = [0.00] key = ('lr', 'warmup-init-lr') args3[key] = [] for params in [1e5]: lr = 0.003239 + (-0.0001395*math.log(params)) args3[key].append((lr, lr*0.1)) args4 = [] args5 = {} args6 = {} rdm = np.random.RandomState(5345) for key, value in args2.items(): cmd = cmd + ' --{0} {1}'.format(key, value) args_prod = [] for key, values in args3.items(): if isinstance(key, tuple):
def log(o): if hasattr(o, "__log__"): return o.__log__() return math.log(o)
def b_spline_nurbs(x, y, z, fname_centerline=None, degree=3, point_number=3000, nbControl=-1, verbose=1, all_slices=True, path_qc='.'): """ 3D B-Spline function :param x: :param y: :param z: :param fname_centerline: :param degree: :param point_number: :param nbControl: :param verbose: :param all_slices: :param path_qc: :return: """ from math import log twodim = False if z is None: twodim = True """x.reverse() y.reverse() z.reverse()""" logger.info('Fitting centerline using B-spline approximation') if not twodim: data = [[x[n], y[n], z[n]] for n in range(len(x))] else: data = [[x[n], y[n]] for n in range(len(x))] # if control_points == 0: # nurbs = NURBS(degree, point_number, data) # BE very careful with the spline order that you choose : if order is too high ( > 4 or 5) you need to set a higher number of Control Points (cf sct_nurbs ). For the third argument (number of points), give at least len(z_centerline)+500 or higher # else: # sct.printv('In b_spline_nurbs we get control_point = ', control_points) # nurbs = NURBS(degree, point_number, data, False, control_points) if nbControl == -1: centerlineSize = getSize(x, y, z, fname_centerline) nbControl = 30 * log(centerlineSize, 10) - 42 nbControl = np.round(nbControl) nurbs = NURBS(degree, point_number, data, False, nbControl, verbose, all_slices=all_slices, twodim=twodim) if not twodim: P = nurbs.getCourbe3D() x_fit = P[0] y_fit = P[1] z_fit = P[2] Q = nurbs.getCourbe3D_deriv() x_deriv = Q[0] y_deriv = Q[1] z_deriv = Q[2] else: P = nurbs.getCourbe2D() x_fit = P[0] y_fit = P[1] Q = nurbs.getCourbe2D_deriv() x_deriv = Q[0] y_deriv = Q[1] """x_fit = x_fit[::-1] y_fit = x_fit[::-1] z_fit = x_fit[::-1] x_deriv = x_fit[::-1] y_deriv = x_fit[::-1] z_deriv = x_fit[::-1]""" if verbose == 2: # TODO qc PC = nurbs.getControle() PC_x = [p[0] for p in PC] PC_y = [p[1] for p in PC] if not twodim: PC_z = [p[2] for p in PC] import matplotlib matplotlib.use('Agg') # prevent display figure import matplotlib.pyplot as plt if not twodim: plt.figure(1) #ax = plt.subplot(211) plt.subplot(211) plt.plot(z, x, 'r.') plt.plot(z_fit, x_fit) plt.plot(PC_z, PC_x, 'go') # ax.set_aspect('equal') plt.xlabel('z') plt.ylabel('x') plt.legend(["centerline", "NURBS", "control points"]) #ay = plt.subplot(212) plt.subplot(212) plt.plot(z, y, 'r.') plt.plot(z_fit, y_fit) plt.plot(PC_z, PC_y, 'go') # ay.set_aspect('equal') plt.xlabel('z') plt.ylabel('y') plt.legend(["centerline", "NURBS", "control points"],loc=4) # plt.show() else: plt.figure(1) plt.plot(y, x, 'r.') plt.plot(y_fit, x_fit) plt.plot(PC_y, PC_x, 'go') # ax.set_aspect('equal') plt.xlabel('y') plt.ylabel('x') plt.legend(["centerline", "NURBS", "control points"]) # plt.show() plt.savefig(os.path.join(path_qc, 'fig_b_spline_nurbs.png')) plt.close() if not twodim: return x_fit, y_fit, z_fit, x_deriv, y_deriv, z_deriv, nurbs.error_curve_that_last_worked else: return x_fit, y_fit, x_deriv, y_deriv, nurbs.error_curve_that_last_worked
n = int(input("Digite a quantidade de números: ")) sn = 0 i = 0 while i < n: s = float(input("Digite um número: ")) sn += s i = i + 1 M = sn / n print("A média dos números digitados é igual a {}".format(M)) if tipoCalculo == 9: print("---LOG---") b = float(input("Digite a base do log: ")) x = float(input("Digite um número: ")) lg = math.log(x, b) print("O log de {} na base {} é igual a {}".format(x, b, lg)) if tipoCalculo == 10: print("---SISTEMAS LINEARES---") A1 = int(input("Digite o valor que acompanha o 1º X: ")) B1 = int(input("Digite o valor que acompanha o 1º Y: ")) A2 = int(input("Digite o valor que acompanha o 2º X: ")) B2 = int(input("Digite o valor que acompanha o 2º Y: ")) C1 = int(input("Digite o valor que é resultante da 1ª equação: ")) C2 = int(input("Digite o valor que é resultante da 2ª equação: ")) import numpy as np A = np.array([[A1, B1], [A2, B2]]) B = np.array([[C1], [C2]])
def NOISE_FREE_RES(SPS, vref=2.048): # check operating mode current_op_mode = self.OPERATING_MODE() # check sample rate and translate to list location using the dictionary normal_sps_translate = { 20: 0, 45: 1, 90: 2, 175: 3, 330: 4, 600: 5, 1000: 6 } turbo_sps_translate = { 40: 0, 90: 1, 180: 2, 350: 3, 660: 4, 1200: 5, 2000: 6 } current_sps = self.DATA_RATE() if current_op_mode == 'turbo': translated_sps = turbo_sps_translate[current_sps] if current_op_mode == 'normal': translated_sps = normal_sps_translate[current_sps] # check gain and translate it to list loation current_gn = self.PGA() translated_gain = int(math.log(current_gn) / math.log(2)) # check pga status current_pga_status = self.PGA_ENABLED() # Set correct reference voltage for calculations # Check vref or use manual if provided if vref != -1: current_ref_voltage = vref else: current_ref = self.VOLTAGE_REF() if current_ref == 'internal': current_ref_voltage = 2.048 elif current_ref == 'analog_supply': current_ref_voltage = 3.3 # Select appropriate noise for current states # If the pga is disabled if current_pga_status == 0: # If turbo mode is enabled if current_op_mode == 'turbo': noise = CONST._TURBO_NOISE_PGA_DISABLED[translated_sps][ translated_gain][1] * (10**(-6)) # If normal mode is enabled else: noise = CONST._NORMAL_NOISE_PGA_DISABLED[translated_sps][ translated_gain][1] * (10**(-6)) # If the pga is enabled else: # If turbo mode is enabled if current_op_mode == 'turbo': noise = CONST._TURBO_NOISE[translated_sps][translated_gain][1] # If normal mode is enabled else: noise = CONST._NORMAL_NOISE[translated_sps][translated_gain][1] # Calculate and return the current effective resolution (uV RMS) res = math.log( (2 * current_ref_voltage) / (current_gn * noise)) / math.log(2) return res
def get_target(self, target, anchors, in_w, in_h, ignore_threshold): bs = len(target) anchor_index = [[0,1,2],[3,4,5],[6,7,8]][self.feature_length.index(in_w)] subtract_index = [0,3,6][self.feature_length.index(in_w)] mask = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) noobj_mask = torch.ones(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tx = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) ty = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tw = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) th = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tconf = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) tcls = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, self.num_classes, requires_grad=False) box_loss_scale_x = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) box_loss_scale_y = torch.zeros(bs, int(self.num_anchors/3), in_h, in_w, requires_grad=False) for b in range(bs): if len(target[b])==0: continue gxs = target[b][:, 0:1] * in_w gys = target[b][:, 1:2] * in_h gws = target[b][:, 2:3] * in_w ghs = target[b][:, 3:4] * in_h gis = torch.floor(gxs) gjs = torch.floor(gys) gt_box = torch.FloatTensor(torch.cat([torch.zeros_like(gws), torch.zeros_like(ghs), gws, ghs], 1)) anchor_shapes = torch.FloatTensor(torch.cat((torch.zeros((self.num_anchors, 2)), torch.FloatTensor(anchors)), 1)) anch_ious = jaccard(gt_box, anchor_shapes) best_ns = torch.argmax(anch_ious,dim=-1) for i, best_n in enumerate(best_ns): if best_n not in anchor_index: continue gi = gis[i].long() gj = gjs[i].long() gx = gxs[i] gy = gys[i] gw = gws[i] gh = ghs[i] if (gj < in_h) and (gi < in_w): best_n = best_n - subtract_index noobj_mask[b, best_n, gj, gi] = 0 mask[b, best_n, gj, gi] = 1 tx[b, best_n, gj, gi] = gx - gi.float() ty[b, best_n, gj, gi] = gy - gj.float() tw[b, best_n, gj, gi] = math.log(gw / anchors[best_n+subtract_index][0]) th[b, best_n, gj, gi] = math.log(gh / anchors[best_n+subtract_index][1]) box_loss_scale_x[b, best_n, gj, gi] = target[b][i, 2] box_loss_scale_y[b, best_n, gj, gi] = target[b][i, 3] tconf[b, best_n, gj, gi] = 1 tcls[b, best_n, gj, gi, int(target[b][i, 4])] = 1 else: print('Step {0} out of bound'.format(b)) print('gj: {0}, height: {1} | gi: {2}, width: {3}'.format(gj, in_h, gi, in_w)) continue return mask, noobj_mask, tx, ty, tw, th, tconf, tcls, box_loss_scale_x, box_loss_scale_y
def create_feature(data, uni_seen_list, bi_seen_list, tri_seen_list, i_num, doUpdate): # unigram process uni_old_list = copy.deepcopy(uni_seen_list) bi_old_list = copy.deepcopy(bi_seen_list) tri_old_list = copy.deepcopy(tri_seen_list) uni_old_freq_list = copy.deepcopy(uni_seen_freq_list) bi_old_freq_list = copy.deepcopy(bi_seen_freq_list) num_uni = len(data) num_uni_unseen = 0 freq_uni_seen = 0 for uni in data: freq_uni_seen += uni_old_freq_list[uni] if not uni in uni_old_list: num_uni_unseen += 1 if doUpdate: uni_seen_list.append(uni) if doUpdate: uni_seen_freq_list[uni] += 1 prop_uni_unseen = num_uni_unseen / num_uni # proportion of unseen unigram words mean_freq_uni = freq_uni_seen / num_uni # print (mean_freq_uni) # bigram process num_bi = len(data) - 1 num_bi_unseen = 0 freq_bi_seen = 0 for i in range(num_bi): bi = list(data[i:i + 2]) freq_bi_seen += bi_old_freq_list[bi[0], bi[1]] # print (freq_bi_seen) if not bi in bi_old_list: num_bi_unseen += 1 if doUpdate: bi_seen_list.append(bi) if doUpdate: bi_seen_freq_list[bi[0]][bi[1]] += 1 prop_bi_unseen = num_bi_unseen / num_bi # proportion of unseen bigram words mean_freq_bi = freq_bi_seen / num_bi # trigram process num_tri = len(data) - 2 num_tri_unseen = 0 for i in range(num_tri): tri = list(data[i:i + 3]) if not tri in tri_old_list: num_tri_unseen += 1 if doUpdate: tri_seen_list.append(tri) # tri_seen_freq_list[tri[0]][tri[1]][tri[2]] += 1 prop_tri_unseen = num_tri_unseen / num_tri # proportion of unseen trigram words # Frequency # print (np.sum(bi_seen_freq_list)) # create tensor variable input_feature = torch.Tensor( np.array([ prop_uni_unseen, prop_bi_unseen, prop_tri_unseen, mean_freq_uni, mean_freq_bi, math.log(i_num + 1) ])) input_feature = input_feature.view(-1, 6) return input_feature
def _snr(self, frames): rms = audioop.rms(b''.join(frames), int(self._input_bits/8)) if rms > 0 and self._threshold > 0: return 20.0 * math.log(rms/self._threshold, 10) else: return 0
def gen_password(entropy_bits): base_entropy = math.log(len(chars)) / math.log(2) nchars = int(math.ceil(entropy_bits) / base_entropy) return ''.join([random.choice(chars) for i in range(nchars)])
def corpus_bleu(list_of_references, hypotheses, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=None): """ Calculate a single corpus-level BLEU score (aka. system-level BLEU) for all the hypotheses and their respective references. Instead of averaging the sentence level BLEU scores (i.e. marco-average precision), the original BLEU metric (Papineni et al. 2002) accounts for the micro-average precision (i.e. summing the numerators and denominators for each hypothesis-reference(s) pairs before the division). >>> hyp1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', ... 'ensures', 'that', 'the', 'military', 'always', ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] >>> ref1a = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', ... 'ensures', 'that', 'the', 'military', 'will', 'forever', ... 'heed', 'Party', 'commands'] >>> ref1b = ['It', 'is', 'the', 'guiding', 'principle', 'which', ... 'guarantees', 'the', 'military', 'forces', 'always', ... 'being', 'under', 'the', 'command', 'of', 'the', 'Party'] >>> ref1c = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', ... 'army', 'always', 'to', 'heed', 'the', 'directions', ... 'of', 'the', 'party'] >>> hyp2 = ['he', 'read', 'the', 'book', 'because', 'he', 'was', ... 'interested', 'in', 'world', 'history'] >>> ref2a = ['he', 'was', 'interested', 'in', 'world', 'history', ... 'because', 'he', 'read', 'the', 'book'] >>> list_of_references = [[ref1a, ref1b, ref1c], [ref2a]] >>> hypotheses = [hyp1, hyp2] >>> corpus_bleu(list_of_references, hypotheses) # doctest: +ELLIPSIS 0.5520... The example below show that corpus_bleu() is different from averaging sentence_bleu() for hypotheses >>> score1 = sentence_bleu([ref1a, ref1b, ref1c], hyp1) >>> score2 = sentence_bleu([ref2a], hyp2) >>> (score1 + score2) / 2 # doctest: +ELLIPSIS 0.6223... :param references: a corpus of lists of reference sentences, w.r.t. hypotheses :type references: list(list(list(str))) :param hypotheses: a list of hypothesis sentences :type hypotheses: list(list(str)) :param weights: weights for unigrams, bigrams, trigrams and so on :type weights: list(float) :return: The corpus-level BLEU score. :rtype: float """ p_numerators = Counter() # Key = ngram order, and value = no. of ngram matches. p_denominators = Counter() # Key = ngram order, and value = no. of ngram in ref. hyp_lengths, ref_lengths = 0, 0 assert len(list_of_references) == len(hypotheses), "The number of hypotheses and their reference(s) should be the same" # Iterate through each hypothesis and their corresponding references. for references, hypothesis in zip(list_of_references, hypotheses): # For each order of ngram, calculate the numerator and # denominator for the corpus-level modified precision. for i, _ in enumerate(weights, start=1): p_i = _modified_precision(references, hypothesis, i) p_numerators[i] += p_i.numerator p_denominators[i] += p_i.denominator # Calculate the hypothesis length and the closest reference length. # Adds them to the corpus-level hypothesis and reference counts. hyp_len = len(hypothesis) hyp_lengths += hyp_len ref_lengths += _closest_ref_length(references, hyp_len) # Calculate corpus-level brevity penalty. bp = _brevity_penalty(ref_lengths, hyp_lengths) # Collects the various precision values for the different ngram orders. p_n = [Fraction(p_numerators[i], p_denominators[i]) for i, _ in enumerate(weights, start=1)] # Smoothen the modified precision. # Note: smooth_precision() converts values into float. if smoothing_function: p_n = smoothing_function(p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_len) # Calculates the overall modified precision for all ngrams. # By sum of the product of the weights and the respective *p_n* s = (w * math.log(p_i) if p_i else 0 for w, p_i in zip(weights, p_n)) return bp * math.exp(math.fsum(s))
import math def get_sum(target, l): ind = preIndex.index(target) n = 2 ** (level - l) - 2 result = 0 for i in range(1, n + 1): result += preIndex[ind + i] return result preIndex = [int(i) for i in input().strip().split(" ")] middleIndex = [int(i) for i in input().strip().split(" ")] level = int(math.log(len(middleIndex) + 1, 2)) fathers = [[int(len(middleIndex) / 2)]] for i in range(level - 2): newL = [] differ = 2 ** (level - 2 - i) for k in fathers[i]: newL.append(k - differ) newL.append(k + differ) fathers.append(newL) for i in range(len(fathers)): for j in fathers[i]: middleIndex[j] = get_sum(middleIndex[j], i) for i in range(len(middleIndex)): if i % 2 == 0: middleIndex[i] = 0 print(*middleIndex,end="")
def edit_function(): addr_space = int(g.get_property("addr_space")) return int(math.log(addr_space, 2) - math.log(dist(), 2))
n = 0; for tag in Pwgt: Pwgt[tag][unknown]=.1 for tag in tags: if sum(Pwgt[tag].values()) > n: mfreq_tag = tag n = sum(Pwgt[tag].values()) n = 0; for tag in tags: if sum(Ptgt[tag].values()) > n: mfreq_taggt = tag n = sum(Pwgt[tag].values()) for tag in Pwgt: obs = sum(Pwgt[tag].values()) for word in Pwgt[tag]: Pwgt[tag][word] = math.log(Pwgt[tag][word])-math.log(obs) for ptag in Ptgt: obs = sum(Ptgt[ptag].values()) for ctag in Ptgt[ptag]: Ptgt[ptag][ctag] = math.log(Ptgt[ptag][ctag])-math.log(obs) start = "START" end = "END" keys = list(Ptgt.keys()) keys.remove(start) keys.append(end) #First, we make an FSA that utilizes P(T|T) f = open("ptgt.fst.txt","w") for tag in Ptgt[start]: f.write("{} {} {} {}\n".format(0, keys.index(tag)+1, tag, -Ptgt[start][tag]))
from random import randint from math import log RandomNumbers = [] RandMaxNumber = int(input('Введите размер массива\n')) for i in range(1 << int(round(log(RandMaxNumber, 2)))): if i <= RandMaxNumber: RandomNumbers.append(randint(1, 10000)) else: RandomNumbers.append(0) #заполняем нулями print('Случайные числа') print(' '.join(str(RandomNumbers))) print('Количество элементов в списке {}'.format(len(RandomNumbers))) #print('Отдельное случайное число - {}'.format(randint(1, 10000)))
def sentence_bleu(references, hypothesis, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=None): """ Calculate BLEU score (Bilingual Evaluation Understudy) from Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002. "BLEU: a method for automatic evaluation of machine translation." In Proceedings of ACL. http://www.aclweb.org/anthology/P02-1040.pdf >>> hypothesis1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'which', ... 'ensures', 'that', 'the', 'military', 'always', ... 'obeys', 'the', 'commands', 'of', 'the', 'party'] >>> hypothesis2 = ['It', 'is', 'to', 'insure', 'the', 'troops', ... 'forever', 'hearing', 'the', 'activity', 'guidebook', ... 'that', 'party', 'direct'] >>> reference1 = ['It', 'is', 'a', 'guide', 'to', 'action', 'that', ... 'ensures', 'that', 'the', 'military', 'will', 'forever', ... 'heed', 'Party', 'commands'] >>> reference2 = ['It', 'is', 'the', 'guiding', 'principle', 'which', ... 'guarantees', 'the', 'military', 'forces', 'always', ... 'being', 'under', 'the', 'command', 'of', 'the', ... 'Party'] >>> reference3 = ['It', 'is', 'the', 'practical', 'guide', 'for', 'the', ... 'army', 'always', 'to', 'heed', 'the', 'directions', ... 'of', 'the', 'party'] >>> sentence_bleu([reference1, reference2, reference3], hypothesis1) # doctest: +ELLIPSIS 0.5045... >>> sentence_bleu([reference1, reference2, reference3], hypothesis2) # doctest: +ELLIPSIS 0.3969... The default BLEU calculates a score for up to 4grams using uniform weights. To evaluate your translations with higher/lower order ngrams, use customized weights. E.g. when accounting for up to 6grams with uniform weights: >>> weights = (0.1666, 0.1666, 0.1666, 0.1666, 0.1666) >>> sentence_bleu([reference1, reference2, reference3], hypothesis1, weights) 0.45838627164939455 :param references: reference sentences :type references: list(list(str)) :param hypothesis: a hypothesis sentence :type hypothesis: list(str) :param weights: weights for unigrams, bigrams, trigrams and so on :type weights: list(float) :return: The sentence-level BLEU score. :rtype: float """ # Calculates the brevity penalty. # *hyp_len* is referred to as *c* in Papineni et. al. (2002) hyp_len = len(hypothesis) # *closest_ref_len* is referred to as *r* variable in Papineni et. al. (2002) closest_ref_len = _closest_ref_length(references, hyp_len) bp = _brevity_penalty(closest_ref_len, hyp_len) # Calculates the modified precision *p_n* for each order of ngram. p_n = [_modified_precision(references, hypothesis, i) for i, _ in enumerate(weights, start=1)] # Smoothen the modified precision. # Note: smooth_precision() converts values into float. if smoothing_function: p_n = smoothing_function(p_n, references=references, hypothesis=hypothesis, hyp_len=hyp_len) # Calculates the overall modified precision for all ngrams. # By sum of the product of the weights and the respective *p_n* s = (w * math.log(p_i) if p_i else 0 for w, p_i in zip(weights, p_n)) sum_s = math.fsum(s) if sum_s == 0 and all(p_n) == 0: return 0 return bp * math.exp(sum_s)