def MJD_string2longdouble(s): """ MJD_string2longdouble(s): Convert a MJD string to a numpy longdouble """ ii, ff = s.split(".") return np.longfloat(ii) + np.longfloat("0."+ff)
def update(self, orderbook, max_obs_size): """Return updated OrderBookSeries. It adds most recent state and remove oldest from OrderBookSeries if size conditions are exceeded keeping memory constraints in check """ # Reverse bids because we want to both tops be the logical next in a deck depth = 25 data = np.full(depth, np.nan, dtype=[('Pask', '<f16'), ('Qask', '<f16'), ('Pbid', '<f16'), ('Qbid', '<f16')]) if len(orderbook.asks) + len(orderbook.bids): for idx, ask in enumerate(orderbook.asks): if idx == depth: break data['Pask'][idx] = np.longfloat(ask.price.amount) data['Qask'][idx] = np.longfloat(ask.amount.amount) for idx, bid in enumerate(orderbook.bids[::-1]): if idx == depth: break data['Pbid'][idx] = np.longfloat(bid.price.amount) data['Qbid'][idx] = np.longfloat(bid.amount.amount) # Build orderbook series data new_obs = OrderBookSeries(np.append(self.t, np.datetime64(time.time_ns(), "ns")), np.append(self.data, data).reshape(self.data.shape[0] + 1, depth)) # Drop old data if at size limit if new_obs.t.shape[0] <= max_obs_size: return new_obs return OrderBookSeries(new_obs.t[1:], new_obs.data[1:])
def read_solution(path, order=None, size=10, limiting=False, advection=False, dimensions=2): reader = vtk.vtkXMLUnstructuredGridReader() reader.SetFileName(path) reader.Update() output = reader.GetOutput() n_points = reader.GetNumberOfPoints() nodes = output.GetPoints().GetData() po_d = output.GetPointData() time_idx = 3 if limiting else 1 time = po_d.GetArray(time_idx).GetTuple(0)[0] number_of_data = int(po_d.GetArray(0).GetSize()/n_points) # Are we 2d or 3d? # Data is [rho, v, p, potT] number_of_data_2d = 6 if advection else 5 number_of_coords = 2 if number_of_data == number_of_data_2d else 3 store_refinement_status = (number_of_coords == 2) num_refinement_cols = 1 if store_refinement_status else 0 data = np.zeros((n_points, 3 + number_of_data + num_refinement_cols)) * float('NaN') for i in range(n_points): if number_of_coords == 2: x, y, z = nodes.GetTuple(i) if limiting: refinement_status = np.longfloat(output.GetPointData().GetArray(1).GetTuple(i)) else: refinement_status = 4 if advection: rho, v_x, v_y, p, Z, potT = np.longfloat(output.GetPointData().GetArray(0).GetTuple(i)) data[i, :] = np.array([x, y, z, rho, v_x, v_y, p, Z, potT, refinement_status]) else: rho, v_x, v_y, p, potT = np.longfloat(output.GetPointData().GetArray(0).GetTuple(i)) data[i, :] = np.array([x, y, z, rho, v_x, v_y, p, potT, refinement_status]) else: assert(not advection and not limiting) x, y, z = nodes.GetTuple(i) rho, v_x, v_y, v_z, p, potT = np.longfloat(output.GetPointData().GetArray(0).GetTuple(i)) data[i, :] = np.array([x, y, z, rho, v_x, v_y, v_z, p, potT]) if number_of_coords == 2 and dimensions == 2: if advection: columns = ['x', 'y', 'z', 'rho', 'v_x', 'v_y', 'p', 'Z', 'potT', 'refinementStatus'] else: columns = ['x', 'y', 'z', 'rho', 'v_x', 'v_y', 'p', 'potT', 'refinementStatus'] else: columns = ['x', 'y', 'z', 'rho', 'v_x', 'v_y', 'v_z', 'p', 'potT'] df = pd.DataFrame(data, columns=columns).dropna() if not order: # Find order from filename order = int(re.search(r'order_(\d)_', path).group(1)) basis_size = order + 1 dx = size/(n_points**0.5/(order+1)) n_cells = int(10/dx)**2 return df, time, order, number_of_coords, dx, n_points
def printRelativeAbundances(self, outFile=None, datasets=[], minimalMaxAbundance=0.0, normaliseToBase=False): datasetheaders = "" for dataset in datasets: datasetheaders += (dataset + "\t") datasetheaders = datasetheaders[:-1] if not outFile: print "Level\tTaxonpath\tTaxon\t%s" % datasetheaders else: outFile.write("Level\tTaxonpath\tTaxon\t%s\n" % datasetheaders) listedNodes = [] for i in self.depths: nodes = self.getChildrenByRank(i) for node in nodes: if ("Unknown" in node.name) or (node in listedNodes): pass else: listedNodes.append(node) row = "%s\t%s\t%s" % (self.depths[i], node.getPhylogenyRDPStyle(), node.name) maxPop=0.0 for dataset in datasets: if normaliseToBase: rc = self.getNode("Cellular organisms") else: rc = self.root all_assignments = rc.getAssignment(dataset) if all_assignments: rpop = all_assignments.population else: rpop = 0 sys.stderr.write("Warning: Dataset %s has no assignments" %dataset) a = node.getAssignment(dataset) if a: npop = a.population else: npop = 0 try: ra = (longfloat(npop) / longfloat(rpop)) except: sys.stderr.write("Problem with longfloat dividing %s with %s" % (npop, rpop)) ra = float ( npop / rpop ) sys.stderr.write("Using", ra) if ra > maxPop: maxPop = ra row += "\t%f" % ra if (maxPop >= minimalMaxAbundance): if not outFile: print(row) else: outFile.write(row + "\n")
def __processDenseMatrix(self, stream, matrixSize): self.depmtx = zeros(matrixSize, dtype=longfloat) line = stream.readline() match = self._getMatch(line, DEPMTX_REGEX, 'depletion matrix') while match: row, col = [int(xx) - 1 for xx in match.groups()[:2]] value = longfloat(match.groups()[2]) self.depmtx[row, col] = longfloat(value) line = stream.readline() match = DEPMTX_REGEX.search(line) return line
def _complementary_belief(G, edge): # Aggregate belief score: 1-prod(1-belief_i) belief_list = [s['belief'] for s in G.edges[edge]['statements']] try: ag_belief = np.longfloat(1.0) - np.prod(np.fromiter( map(lambda belief: np.longfloat(1.0) - belief, belief_list), dtype=np.longfloat)) except FloatingPointError as err: logger.warning('%s: Resetting ag_belief to 10*np.longfloat precision ' '(%.0e)' % (err, Decimal(NP_PRECISION * 10))) ag_belief = NP_PRECISION * 10 return ag_belief
def ag_belief_score(belief_list): """Each item in `belief_list` should be a float""" # Aggregate belief score: 1-prod(1-belief_i) try: ag_belief = np.longfloat(1.0) - np.prod( np.fromiter(map(lambda belief: np.longfloat(1.0) - belief, belief_list), dtype=np.longfloat)) except FloatingPointError as err: logger.warning('%s: Resetting ag_belief to 10*np.longfloat ' 'precision (%.0e)' % (err, Decimal(NP_PRECISION * 10))) ag_belief = NP_PRECISION * 10 return ag_belief
def gradient(x, y, w, batch_size, lamb, N): g = np.zeros(np.size(w)) for i in range(np.size(w)): if w[i] > 0: g[i] = 1 * lamb * batch_size / N elif w[i] < 0: g[i] = -1 * lamb * batch_size / N else: g[i] = np.random.uniform(-1, 1) / batch_size for j in range(np.shape(x)[0]): g = g - y[j] * (x[j][:]).T * np.longfloat( np.exp(-y[j] * np.dot(w, x[j][:].T))) / ( 1 + np.longfloat(np.exp(-y[j] * np.dot(w, x[j][:].T)))) return g
def LoadData(fileName, start, end): inputData = [] outputData = [] with open(fileName, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in itertools.islice(reader, start, end): output = np.longfloat(row[5]) input = [ np.longfloat(x) for x in itertools.islice(row, 5, len(row)) if x != '' ] #len(row) inputData.append(input) outputData.append(output) return inputData, outputData
def ag_belief_score(belief_list): """Each item in `belief_list` should be a float""" # Aggregate belief score: 1-prod(1-belief_i) with np.errstate(all='raise'): try: ag_belief = np.longfloat(1.0) - np.prod( np.fromiter(map(lambda belief: np.longfloat(1.0) - belief, belief_list), dtype=np.longfloat)) except FloatingPointError as err: logger.warning('%s: Resetting ag_belief to 10*np.longfloat ' 'precision (%.0e)' % (err, Decimal(MIN_WEIGHT))) ag_belief = MIN_WEIGHT return ag_belief
def step(self, obs): obs_in = np.copy(obs.astype(np.float)).squeeze() if self._scale is not None: obs_in = ((obs_in - self._min) * 2*np.ones([2]) / self._range) - np.ones([2]) x = torch.from_numpy(obs_in.reshape(1,-1)).type(self._dtype) y = x.mm(self._weights[0]) + self._biases[0] for i in range(len(self._layer_sizes) - 2): if self._activation_function == 'relu': y = y.clamp(min=0) elif self._activation_function == 'tanh': y = np.tanh(y) elif self._activation_function == 'leakyrelu': y[y < 0] = 0.01 * y[y < 0] y = y.mm(self._weights[i + 1]) + self._biases[i + 1] y = y.numpy() y = np.tanh(np.longfloat(self._controller_tmp * y)) self._action = y[0, :].astype(np.float64) if self._action.size == 1: self._action = np.array([self._action]) self._action = np.clip(self._action, -1, 1) # just in case.. return self._action
def calculate_number_matrix_w(self, bias, epsilon_range): """Calculates the W matrix, required for self consistency. Warning; assumes left/right coupled lead!!!""" number_rows = self.epsilon.shape[0] number_cols = self.distribution().shape[0] self.w_matrix = np.zeros((number_rows, number_cols)) superset = self.generate_superset(0) fd = lambda epsilon: 1.0 / (1 + np.exp( np.longfloat(epsilon * self.beta))) left_green_integral = [] right_green_integral = [] for i in superset: retarded_lambda, advanced_lambda = self.singleparticlebackground(i) left_w_lambda = [] right_w_lambda = [] state = self.ket(i) for j in range(self.dim): if state[j] == 1.0: left_w_lambda.append(lambda epsilon: np.real( fd(epsilon + 0.5 * bias) * retarded_lambda(epsilon). item(j, j) * self.gamma * advanced_lambda( epsilon).item(j, j))) right_w_lambda.append(lambda epsilon: np.real( fd(epsilon - 0.5 * bias) * retarded_lambda(epsilon). item(j, j) * self.gamma * advanced_lambda( epsilon).item(j, j))) left_green_integral.append(left_w_lambda) right_green_integral.append(right_w_lambda) #edit this loop so that it creates the integrals required for W #should be clear how to do this (for epsi -> correct linspace, etc) #Upon reconsideration of the derivation leading to 3.13, a factor \frac{\hbar}{\imath} seems to be missing. #However, that factor hbar is then absorbed into the fourier -> energy integral substitution factor_w = 1. / (2. * np.pi) for i in superset: left_value = 0.0 for lambda_fun in left_green_integral[i]: left_value += factor_w * np.trapz( [lambda_fun(er) for er in epsilon_range], epsilon_range) right_value = 0.0 for lambda_fun in right_green_integral[i]: left_value += factor_w * np.trapz( [lambda_fun(er) for er in epsilon_range], epsilon_range) #Now, we need to iterate over the filled states in ket(i). Afterwards, we go over the superset again # and add the contribution for each seperate state. state = self.ket(i) for j in range(self.dim): if state[j] == 1.0: for beta in self.generate_superset(i): betaState = self.ket(beta) if betaState[j] == 1.0: self.w_matrix[j, beta] += left_value + right_value pass
def drag_coe_Cp(self, data_name = 'Pressure'): """ Calculate pressure coefficient, drag coefficient in pressure distribution method. Because rounding of numbers in python, some points in the probe are inside the cylinder, here modify the diameter to avoid that (Zoom diameter 101%) :param data_name: field name for extracting :return: theta in degree and Cp seperately in array """ ugrid = self.reader.GetOutputPort() self.D = 1.01 * self.D # Resolution of the probe resolution = 1000 # Just the upper half of the surface delta_theta = np.longfloat(180)/resolution detector = [] theta = [] for i in range(resolution + 1): theta.append(delta_theta * i) x = self.c0[0] + self.D/2 * np.longfloat(cos(radians(180 - delta_theta * i))) y = self.c0[1] + self.D/2 * np.longfloat(sin(radians(180 - delta_theta * i))) z = self.c0[2] detector.append([x, y, z]) points = vtk.vtkPoints() points.SetDataTypeToDouble() for i in range(len(detector)): points.InsertNextPoint(detector[i][0], detector[i][1], detector[i][2]) detectors = vtk.vtkPolyData() detectors.SetPoints(points) probe = vtk.vtkProbeFilter() probe.SetInputConnection(ugrid) probe.SetSourceConnection(ugrid) probe.SetInputData(detectors) probe.Update() data = probe.GetOutput() FS = [] for j in range(points.GetNumberOfPoints()): p = data.GetPointData().GetScalars(data_name).GetComponent(j, 0) FS.append((float(p) - self.p0)/(0.5 * self.rho0 * self.U0**2)) return np.array(theta), np.array(FS)
def Gaussian_DN(X, U_Mean, Cov): D = np.shape(X)[0] Y = X - U_Mean temp = Y.T * (Cov + np.eye(D) * 0.01).I * Y result = (1.0 / ((2 * np.pi)**(D / 2))) * ( 1.0 / (np.linalg.det(Cov + np.eye(D) * 0.01)**0.5)) * np.longfloat( np.exp(-0.5 * temp)) return result
def sigmoid(inX): ''' sigmoid函数, sigma(z) = 1 / (1 + np.exp(-z)) :param inX: :return: ''' # 如果inX是一个向量或数组, 则np.exp(-inX)是针对其中的每一个元素进行运算的, 得到的结果仍然是一个向量或数组 return np.longfloat(1.0 / (1 + np.exp(-inX)))
def is_longdouble_binary_compatible(): try: one = np.frombuffer( b'\x00\x00\x00\x00\x00\x00\x00\x80\xff\x3f\x00\x00\x00\x00\x00\x00', dtype='<f16') return one == np.longfloat(1.) except TypeError: return False
def flux_density(wl, T, R, D): one_parsec = 3.0857e16 conv_fac = ((1e3)**2/(one_parsec**2))*1e26 a = (( 2*const.pi*const.speed_of_light*const.h) / wl**3) b = np.longfloat( (const.h*const.speed_of_light)/(wl*const.k*T) ) return ( a * (1 / (np.exp(b) - 1)) * (R**2 / D**2) ) * conv_fac
def evaluate(log_target_prop, log_target_prev): ratio = np.exp(np.longfloat(log_target_prop - log_target_prev)) if (ratio > 1) or (np.random.uniform() < ratio): return True, ratio # if accepted else: return False, ratio # if rejected
def difference(self): dataMean = np.mean(self.numMatrix) sum1 = 0 for i in range(0, self.length): sum1 = sum1 + np.power( (np.longfloat(self.numMatrix[i][0] - dataMean)), 2) divide = sum1 / (self.length - 1) return np.sqrt(divide)
def __token_type_overlap(utt: pd.Series) -> np.longfloat: preceding_token_types = utt[ TokenTypeOverlapColumn.PRECEDING_TOKEN_TYPES.value] if pd.isnull(preceding_token_types): result = np.longfloat(0.0) else: token_types = utt[TokenTypeOverlapColumn.TOKEN_TYPES.value] result = alignment_metrics.token_type_overlap_ratio( token_types, preceding_token_types) return result
def generatorspectra(spectype, mV, filename): """Takes as inputs: the spectral type and magnitude of an object; and the name of the file, where all the parameters are. Generates a spectra.""" # import the parameters from the parameter file ffiltername, spectraltype, mV, startrange, endrange, arrsize, skyfile,\ atmofile, telefile, pbfiltername, fudgefactor, expostime, diameter,\ seeingvalue, quantumeff, startrange2, endrange2, vbessel, umag, gmag,\ rmag,imag,zmag = reader(filename) # take into account the filter vf, fluxfilt = loadfilter(filename) # spectra in 1 Angstrom bins from 0 to 9999 Angstroms spectra = np.zeros(arrsize) # Generate a blackbody spectra if spectype < 100 if spectype > 100: # if spectral type > 100, then power law with slope type T = spectype # blackbody temperature is given by the spectral type # waveleng in cm; array starts from 1 to avoid division by zero wavelength = np.arange(1,arraysize) * 1E-8 spectra[1:,] = (2.0*h*c**2/wavelength**5)\ /( np.exp(np.longfloat(h*c/(wavelength*k*T)))-1.0 ) #Planck's law # Generate a power law spectra if spectype is smaller or equal to 100 if spectype <= 100: powerbase = np.arange(1,arrsize) # start from 1 to avoid problems when dividing by zero spectra[1:,] = powerbase ** spectype # normalize spectra = spectra/np.max(spectra[startrange:endrange]) fluxspec = np.sum(spectra[startrange:endrange])/(endrange-startrange) spectra = spectra/ fluxspec # normalised convolution of spectra and filter fluxV = np.sum(vf[startrange:endrange] * spectra[startrange:endrange]) / (endrange-startrange) # Normalize to specified mV - result should be in erg/s/cm^2/Angstrom spectra = spectra * (fluxV/fluxfilt) * 3.63E-9 * (10.0 ** (-mV/2.5)) # fluxfilt is flux from the filter (default is V filter) # debug below: check the spectra by plotting it # wavelengths = np.arange(10000) *1E-8 # plt.figure() # plt.plot(wavelengths*1E8, spectra) # plt.xlabel("Wavelength (cm)") # plt.ylabel("Spectral radiance") # plt.legend() # plt.show() # print "mAB at 6000 Angstroms = ",-2.5*np.log10(spectra[6000]) - 21.1 return spectra
def step(self, policy, obs): obs_in = np.copy(obs.astype(np.float)).squeeze() #print('obs_in:' + str(obs_in)) policy_in = np.copy(policy).squeeze() # format weights self._weights = [] index = 0 for i in range(len(self._layer_sizes) - 1): ind_weights = np.arange( index, index + self._layer_sizes[i] * self._layer_sizes[i + 1]) index = index + (self._layer_sizes[i]) * self._layer_sizes[i + 1] self._weights.append( torch.from_numpy(policy_in[ind_weights].reshape( [self._layer_sizes[i], self._layer_sizes[i + 1]])).type(self._dtype)) # take only a subset of observation as input to the NN if self._subset_obs is not None: obs_in = obs_in[self._subset_obs] # normalize observations (zscore) if self._norm_values is not None: obs_in = (obs_in - self._norm_values[0, :]) / self._norm_values[1, :] # or scale values to [-1,1]^N if scale is not None elif self._scale is not None: #print(obs_in) #print(self._min) obs_in = ((obs_in - self._min) * 2 * np.ones([6]) / self._range) - np.ones([6]) x = torch.from_numpy(obs_in.reshape(1, -1)).type(self._dtype) y = x.mm(self._weights[0]) #print('y:' + str(y)) #print('weight:' + str(self._weights)) for i in range(len(self._layer_sizes) - 2): if self._activation_function == 'relu': y = y.clamp(min=0) elif self._activation_function == 'tanh': y = np.tanh(y) elif self._activation_function == 'leakyrelu': y[y < 0] = 0.01 * y[y < 0] y = y.mm(self._weights[i + 1]) y = y.numpy() y = np.tanh(np.longfloat(self._controller_tmp * y)) self._action = y[0, :].astype(np.float64) if self._action.size == 1: self._action = np.array([self._action]) self._action = np.clip(self._action, -1, 1) # just in case.. #print('Len of action in controllers\n') #print (len(self._action)) return self._action
def sentence_logprob(self, sentence): """ COMPLETE THIS METHOD (PART 5) Returns the log probability of an entire sequence. """ grams = get_ngrams(sentence, 3) p = 1 for gram in grams: p *= np.longfloat(self.smoothed_trigram_probability(gram)) return np.log2(p)
def LoadData_RandTstDates_NFold(fileName, N): TrainDates, ValidationDates, TestDate = Dates(fileName, True, N) """print TrainDates print "val " print ValidationDates print "TEST " print TestDate sys.exit()""" inputData = [[] for _ in range(len(TrainDates))] outputData = [[] for _ in range(len(TrainDates))] validationInputData = [] validationOutputData = [] testInputData = [] testOutputData = [] testDataForCSV = [] with open(fileName, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') header = reader.next() #for row in itertools.islice(reader, 8883,8897): for row in reader: for group in TrainDates: if (datetime.datetime.strptime(row[0], '%Y%m%d').date() in group): output = np.longfloat(row[6]) input = [ np.longfloat(x) for x in itertools.islice(row, 7, len(row)) if x != '' ] #len(row) index = TrainDates.index(group) inputData[index].append(input) outputData[index].append(output) break if (datetime.datetime.strptime(row[0], '%Y%m%d').date() in ValidationDates): output = np.longfloat(row[6]) input = [ np.longfloat(x) for x in itertools.islice(row, 7, len(row)) if x != '' ] #len(row) validationInputData.append(input) validationOutputData.append(output) if (datetime.datetime.strptime(row[0], '%Y%m%d').date() == TestDate): output = np.longfloat(row[6]) input = [ np.longfloat(x) for x in itertools.islice(row, 7, len(row)) if x != '' ] #len(row) testInputData.append(input) testOutputData.append(output) outRow = [ str(x) for x in itertools.islice(row, 0, 7) if x != '' ] testDataForCSV.append(outRow) return inputData, outputData, validationInputData, validationOutputData, testInputData, testOutputData, testDataForCSV
def size_order(obs: OrderBookSeries, order: Order, min_amount_base) -> Order: """ This function is called when a order is being placed and it needs to be sized for an amount, needs a order with price and direction """ # Respect maximum amount of funds (base currency) in order Q_max = np.longfloat(str(CONFIG['max_funds_in_order'] / order.price)) # Size order as the mean size of the two top orders on the last 15s last15s = np.argwhere(obs.t > obs.t[-1] - np.timedelta64(15, 's')).flatten() side = 'Qask' if order.buy_or_sell == 'BUY' else 'Qbid' Q = obs.data[side][last15s] amt = min(Q[:,:2].mean(), Q_max) # Make sure order size is not lower than the min amount of that market amt = max(amt, min_amount_base) return order.replace(amount = amt)
def biseccion(funcion, a, b, error, valor_raiz=None, max_iteraciones=10000): tabla = [[ 'k', 'a', 'b', 'F(a)', 'F(b)', 'p', 'err_rel', 'cota_err', 'err_absoluto' ]] cant_iteraciones = calcular_cant_iteraciones(a, b, error) ai = np.longfloat(a) bi = np.longfloat(b) Fai = funcion(ai) Fbi = funcion(bi) err = '-' err_rel = '-' for i in range(cant_iteraciones + 1): if tabla[-1][3] == 0: break p = (ai + bi) / 2 tabla.append([i, ai, bi, Fai, Fbi, p, err_rel, abs(ai - bi), err]) Fp = funcion(p) if valor_raiz: err = abs(p - valor_raiz) err_rel = err / valor_raiz if (Fp * Fai < 0): bi = p Fbi = Fp else: ai = p Fai = Fp return tabla
def __utt_mean_overlap( utt_row: pd.Series, prev_coref_utts: Mapping[int, pd.DataFrame]) -> float: coref_seq_ordinality = utt_row[ TokenTypeOverlapColumn.COREF_SEQ_ORDER.value] if coref_seq_ordinality < 2: result = np.longfloat(0.0) else: prev_utts = prev_coref_utts[coref_seq_ordinality] token_types = utt_row[TokenTypeOverlapColumn.TOKEN_TYPES.value] result = prev_utts.apply( lambda prev_utt: alignment_metrics.token_type_overlap_ratio( token_types, prev_utt[TokenTypeOverlapColumn.TOKEN_TYPES. value]), axis=1).mean() return result
class GeneralConvergenceTokenTypeOverlapCalculator(object): __NULL_ARRAY = np.full(1, np.longfloat(0.0)) @classmethod def __utt_overlaps(cls, utt_row: Mapping[str, Any], prev_coref_utts: Mapping[int, pd.DataFrame]) -> Tuple[ int, np.array]: coref_seq_ordinality = utt_row[TokenTypeOverlapColumn.COREF_SEQ_ORDER.value] if coref_seq_ordinality < 2: overlaps = cls.__NULL_ARRAY else: prev_utts = prev_coref_utts[coref_seq_ordinality] token_types = utt_row[TokenTypeOverlapColumn.TOKEN_TYPES.value] overlaps = prev_utts.apply( lambda prev_utt: alignment_metrics.token_type_overlap_ratio(token_types, prev_utt[ TokenTypeOverlapColumn.TOKEN_TYPES.value]), axis=1).values return coref_seq_ordinality, overlaps def __init__(self, coreference_feature_col_name: str): self.coreference_feature_col_name = coreference_feature_col_name # noinspection PyTypeChecker,PyUnresolvedReferences def __call__(self, df: pd.DataFrame) -> Dict[int, List[float]]: scored_df = df.copy(deep=False) # Ensure that rows are sorted in order of which round they are for and their chronological ordering withing each round scored_df.sort_values( by=[sd.EventDataColumn.ROUND_ID.value, utterances.UtteranceTabularDataColumn.START_TIME.value, utterances.UtteranceTabularDataColumn.END_TIME.value], inplace=True) # Calculate token type overlap for each chain of reference for each entity/coreference feature and each speaker in each session session_ref_utts = scored_df.groupby((write_target_ref_utts.DYAD_COL_NAME, self.coreference_feature_col_name), as_index=False, sort=False) scored_df[TokenTypeOverlapColumn.COREF_SEQ_ORDER.value] = session_ref_utts.cumcount() + 1 coref_seq_ordinalities = scored_df[TokenTypeOverlapColumn.COREF_SEQ_ORDER.value].unique() prev_coref_utts = dict((coref_seq_ordinality, scored_df.loc[ scored_df[TokenTypeOverlapColumn.COREF_SEQ_ORDER.value] == coref_seq_ordinality - 1]) for coref_seq_ordinality in coref_seq_ordinalities) result = dict((coref_seq_ordinality, []) for coref_seq_ordinality in coref_seq_ordinalities) for row in scored_df.itertuples(): # noinspection PyProtectedMember coref_seq_ordinality, overlaps = self.__utt_overlaps(row._asdict(), prev_coref_utts) result[coref_seq_ordinality].extend(overlaps) return result
def LoadData_RandTstDates(fileName): TrainDates, ValidationDates, TestDate = Dates(fileName) inputData = [] outputData = [] validationInputData = [] validationOutputData = [] testInputData = [] testOutputData = [] testDataForCSV = [] with open(fileName, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') header = reader.next() i = 2 for row in reader: #for row in itertools.islice(reader, 1,2): if (datetime.datetime.strptime(row[0], '%Y%m%d').date() in TrainDates): output = np.longfloat(row[6]) input = [ np.longfloat(x) for x in itertools.islice(row, 7, len(row)) if x != '' ] #len(row) inputData.append(input) outputData.append(output) if (datetime.datetime.strptime(row[0], '%Y%m%d').date() in ValidationDates): output = np.longfloat(row[6]) input = [ np.longfloat(x) for x in itertools.islice(row, 7, len(row)) if x != '' ] #len(row) validationInputData.append(input) validationOutputData.append(output) if (datetime.datetime.strptime(row[0], '%Y%m%d').date() == TestDate): output = np.longfloat(row[6]) input = [ np.longfloat(x) for x in itertools.islice(row, 7, len(row)) if x != '' ] #len(row) testInputData.append(input) testOutputData.append(output) outRow = [ str(x) for x in itertools.islice(row, 0, 7) if x != '' ] testDataForCSV.append(outRow) return inputData, outputData, validationInputData, validationOutputData, testInputData, testOutputData, testDataForCSV
def adaboost_train_ds(data_arr, class_labels, num_it=40): """基于单层决策树训练AdaBoost :param data_arr: 数据集 :param class_labels: 类别标签 :param num_it: 迭代次数 :return: """ weak_class_arr = [] m = np.shape(data_arr)[0] # 为每一条数据初始化权重 d = np.mat(np.ones((m, 1)) / m) # 记录每个数据点的类别估计累计值 agg_class_est = np.mat(np.zeros((m, 1))) for i in range(num_it): # 建立单层决策树 best_stump, error, class_est = build_stump(data_arr, class_labels, d) # print("D:", d.T) # 计算每一个单层决策树的权重 # max(error,1e-16)保证在没有错误时不会除0异常 alpha = float(0.5 * np.log( (1 - error) / np.longfloat(max(error, 1e-16)))) # 保存决策树权重和单层决策树 best_stump['alpha'] = alpha weak_class_arr.append(best_stump) # print("class_est:", class_est.T) # 计算新的权重向量d expon = np.multiply(-1 * alpha * np.mat(class_labels).T, class_est) d = np.multiply(d, np.exp(expon)) d = d / d.sum() # 类别估计值 agg_class_est += alpha * class_est # print('agg_class_est', agg_class_est.T) # 获取错误率 agg_errors = np.multiply( np.sign(agg_class_est) != np.mat(class_labels).T, np.ones((m, 1))) # print(agg_errors) error_rate = agg_errors.sum() / m # print('total error', error_rate, '\n') if error_rate == 0.0: break return weak_class_arr, agg_class_est
def test_literal(): cases = { " 42": 42, "+42LLu": 42, "-0": 0, "0o52": 42, "-052": -42, "0B101010": 42, "-0x2A": -42, "3.141592653589793": 3.141592653589793, "-3.141592653589793": -3.141592653589793, "3.141592653589793f": np.float32("3.1415927"), "-314.1592653589793e-2f": np.float32("-3.1415927"), ".31415926535897932385e1l": np.longfloat("3.1415926535897932385"), "true": True, "false": False, r'"\tk\012" ': "\tk\012", r"'c'": "c", } for s, x in cases.items(): yield check_literal, s, x
def forward(self, weighted_input): return np.longfloat(1.0 / (1.0 + np.exp(-weighted_input)))
def printPopulationsAtDepth(self, depth=PHYLUM, outFile=None, ignoreAcc=True, dataset=None, normaliseToBase=True, useRankData=True): """Print number assigned, unique and Chao estimate for each taxa in rank, tab-separated text format """ if not type(depth) is IntType: #Find depth for i in self.dephts: if self.depths[i] == depth: depth = i break if not outFile: print "Parent taxa\tTaxa\tAbundance\tShare\tUnique\tChao" else: outFile.write("Parent taxa\tTaxa\tAbundance" "\tShare\tUnique\tChao\n") total = 0 totalU = 0 if useRankData: nodes = self.getChildrenByRank(depth) else: nodes = self.root.getChildrenByDepth(depth=depth, ignoreAcc=ignoreAcc) nodes = self._fixRanks(nodes, depth) if not self.root.getAssignment(dataset): sys.stderr.write("No assignments in %s\n" % dataset) return if normaliseToBase and depth > self.META: rootNode = self.getNode("Cellular organisms") else: rootNode = self.root rpop = rootNode.getAssignment(dataset).population for node in nodes: if "Unknown" in node.name: pass else: a = node.getAssignment(dataset) if a: npop = a.population nu = a.numberAssigned() if not "Unknown" in node.name: total += npop totalU += nu else: npop = 0 nu = 0 percent = longfloat(npop) / longfloat(rpop) if not outFile: print("%s\t%s\t%s\t%s\t%s\t%s" % (node.parentPrintName, node.name, npop, percent, nu, node.chaoEstimate(dataset))) else: outFile.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (node.parentPrintName, node.name, npop, percent, nu, node.chaoEstimate(dataset))) percent = longfloat(total) / longfloat(rpop) if not outFile: print "\tTotal\t%s\t%s\t%s" % (total, percent, totalU) else: outFile.write("\tTotal\t%s\t%s\t%s\n" % (total, percent, totalU)) ut = rpop - total percent = longfloat(ut) / longfloat(rpop) utU = rootNode.getAssignment(dataset).numberAssigned() - totalU if not outFile: print ("\tUnclassified at %s level\t%s\t%s\t%s" % (self.depths[depth], ut, percent, utU)) else: outFile.write("\tUnclassified at %s level\t%s\t%s\t%s\n" % (self.depths[depth], ut, percent, utU))
s = a.shape[0] b = np.zeros((s, 30-s)) c = np.zeros((30-s, s)) d = np.identity(30-s) picks = np.bmat([[a, b], [c, d]]) # Build transition matrix t = np.zeros((32, 32)) for i in range(0,30): # Seed i+1 gets pick 1 p0 = np.longfloat(picks[i, 0]) # Seed i+1 gets pick 5 p4 = np.longfloat(picks[i, 4]) #print(p0, p4) j = max(i-2, 0) k = min(i+3, 29) t[j, i] = np.longfloat(0.40*(1.0-(p0+p4))) t[k, i] = np.longfloat(0.60*(1.0-(p0+p4))) # Absorbing states t[30, i] = np.longfloat(p0)