def sample(preds, temperature = 1.0): preds = np.asarry(preds).astype('float64') preds = np.log(preds) / temperature exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds) probas = np.random.multinomial(1, preds, 1) return np.argmax(probas)
def d2v_infer_vecs(model, docs): from gensim.models.doc2vec import Doc2Vec import numpy as np docs = spacy_tokenize(docs) docvecs = np.asarry([model.infer_vector(docs[i]) for i in len(docs)]) return docvecs
def validation_loss(self, overwrite_loss_func=False, return_val_data=False): self.model.eval() if overwrite_loss_func: loss_func = overwrite_loss_func else: loss_func = self.loss_func if self.max_validation_steps == -1: self.max_validation_steps = len(self.data_val) if return_val_data: return_val_data = [] val_loss = [] with torch.no_grad(): count = tqdm( range(self.max_validation_steps), desc="Validation", disable=not self.verbose, leave=True, ) for data, i in zip(self.data_val, count): data = data["data"].to(self.device) pred = self.model(data) val_loss.append(loss_func(pred, data).cpu().numpy()) if return_val_data: return_val_data.append(data.cpu().numpy()) if return_val_data: return np.asarray(val_loss), np.asarry(return_val_data) return np.asarray(val_loss)
def __getitem__(self, scan_index: int) -> Tuple[np.array, Tuple[int, int, int]]: ''' returns: - scan - scan shape ''' with h5py.File(self.scans[scan_index], mode='r') as f: data = f[self.data_key] return np.asarry(f), f.shape
def sort_hashes(query_hash, hashes): """ Given a dict of hogs:hashes, returns a sorted array of hogs and jaccard distances relative to query hog. :param query hash: weighted minhash of the query :param hashes: a dict of hogs:hashes :return: sortedhogs, jaccard """ #sort the hashes by their jaccard relative to query hash jaccard = [query_hash.jaccard(hashes[hog]) for hog in hashes] index = np.argsort(jaccard) sortedhogs = np.asarry(list(hashes.keys()))[index] jaccard = jaccard[index] return sortedhogs, jaccard
def load_next_image(self): ''' Load the next image in a batch. ''' if self._cur == len(self.indexlist): self._cur =0 index = self.indexlist(self._cur) #Choose the image type {e.g., .png, .jpg, .JPEG}. im_file_name = index + '.JPEG' im = np.asarry(Image)
def create_wrong(file_path): folder = np.random.choice(glob.glob(file_path + "*")) while floder == "fatalab": folder = np.random.choice(glob.glob(file_path + "*")) mat = np.zeros((480, 640), dtype='float32') i = 0 j = 0 depth_file = np.random_choice(glob.glob(folder + "/*.dat")) with open(depth_file) as file: for line in file: vals = line.split('\t') for val in vals: if val == "\n": continue if int(val) > 1200 or int(val) == -1: val = 1200 mat[i][j] = float(int(val)) j += 1 j = j % 640 i += 1 mat = np.asarry(mat) mat_small = mat[140:340, 220:420] mat_small = (mat_small - np.mean(mat_small)) / np.max(mat_small) plt.imshow(mat_small) plt.show() folder2 = np.random.choice(glob.glob(file_path + "*")) while folder == folder2 or folder2 == "datalab": #it activates if it chose the same folder folder2 = np.random.choice(glob.glob(file_path + "*")) mat2 = np.zeros((480, 640), dtype='float32') i = 0 j = 0 depth_file = np.random.choice(glob.glob(folder2 + "/*.dat")) with open(depth_file) as file: for line in file: vals = line.split('\t') for val in vals: if val == "\n": continue if int(val) > 1200 or int(val) == -1: val = 1200 mat2[i][j] = float(int(val)) j += 1 j = j % 640 i += 1 mat2 = np.asarray(mat2) mat2_small = mat2[140:340, 220:420] mat2_small = (mat2_small - np.mean(mat2_small)) / np.max(mat2_small) plt.imshow(mat2_small) plt.show() return np.array([mat_small, mat2_small])
def bond_label_fraction(top_n=2): """ Get the fraction of each type of label 0 (low energy), 1 (high energy), and 2 (unknown) in the dataset. Note, this requires that when extracting the reactions, all the reactions related to a reactant (ignore symmetry) should be extracted. Note, here we analyze on the 0,0,0 type reactions. """ # filename = "~/Applications/db_access/mol_builder/reactions.pkl" # filename = "~/Applications/db_access/mol_builder/reactions_n200.pkl" # filename = "~/Applications/db_access/mol_builder/reactions_qc.pkl" # filename = "~/Applications/db_access/mol_builder/reactions_qc_ws.pkl" filename = "~/Applications/db_access/mol_builder/reactions_qc_ws_charge0.pkl" extractor = ReactionCollection.from_file(filename) groups = extractor.group_by_reactant_charge_0() num_bonds = [] frac = defaultdict(list) for ropb in groups: counts = np.asarry([0, 0, 0]) all_none = True for i, rxn in enumerate(ropb.order_reactions()): energy = rxn.get_free_energy() if energy is None: counts[2] += 1 else: all_none = False if i < top_n: counts[1] += 1 else: counts[0] += 1 if all_none: print( "reactant {} {} has not broken bond reaction; should never happen" .format(ropb.reactant.id, ropb.reactant.formula)) continue n = len(ropb.order_reactions()) num_bonds.append(n) frac = counts / n print("### number of bonds in dataset (mean):", np.mean(num_bonds)) print("### number of bonds in dataset (median):", np.median(num_bonds)) for i, fr in enumerate(frac): print(f"### label{i} bond ratio in dataset (mean): {np.mean(fr)}") print(f"### label{i} bond ratio in dataset (mean): {np.median(fr)}")
def maloshell_cooling(A1, A2, Ea_1, Ea_2, c0, tau=132.5, T_room=21.1): """ Approximates isomizeration rate during cooling, and assumes no significant isomizeration after one e-fold (tau minutes) have past. Default tau is taken from cooling one gallon of water in an 8 quart stainless steel stock pot with a steel lid on. Args: A1 (float): Exponential prefactor for k1. A2 (float): Exponential prefactor for k2. Ea_1 (float): Activation energy for reaction 1. Ea_2 (float): Activation energy for reaction 2. c0 (np.ndarray): Initial condition vector. tau (float): Cooling rate time scale in minutes. Default is 132.5 min. T_room (float): Room temperature water is cooling in. Default is 21.1 C (70 F). Returns: function: Utilization function vector for c1, c2, c3 """ # Make sure c0 is an array: c0 = np.asarray(c0) # Create time array with spacings of one minute. t_arr = np.linspace(0., tau, np.ceil(tau) + 1) # Grab the Newton Cooling function T(t) with T0=T_room and Ti=100 C temp_func = NewtonCooling.T(T_room, 100., tau) # Get rate functions: k1_func = lambda t: reaction.arrhenius(Ea_1, A1)(temp_func(t)) k2_func = lambda t: reaction.arrhenius(Ea_2, A2)(temp_func(t)) def dcdt(c, t): # Internal function to solve ODE. c = np.array([c1, c2, c3]) M = np.zeros((3, 3)) M[0, 0] = -reaction.RateEquations.dkn_dt(1, k1_func(t))(t) M[1, 0] = reaction.RateEquations.dkn_dt(1, k1_func(t))(t) M[1, 1] = -reaction.RateEquations.dkn_dt(1, k2_func(t))(t) M[2, 1] = reaction.RateEquations.dkn_dt(1, k2_func(t))(t) # print(M) return np.dot(M, c) c = odeint(dcdt, c0, t_arr) # Extrapolation fill values for each component of vector c. ext = [(c[0, i], c[-1, i]) for i in range(3)] # Linear interpolation for each vector component. Assumes no utilization below t=0 # and fixed utilization above t value given as input. return np.asarry( [interp1d(t_arr, c[:, i], fill_value=ext[i]) for i in range(3)])
def getLayer(self, layer): """ Gets the given layer from the image layer:- as string with one of 'rgb', 'l' or 'grad', selects which layer to retrieve """ if layer == 'rgb': return self.rgbImage elif layer == 'l': return self.lumoImage elif layer == 'grad': return self.lumoGradient else: return np.asarry([[0]], dtype=np.int32)
def getAfPointTile(self, afPointIndex, layer): """ Returns the tile corresponding to the primary AF Point for the image, from the selected layer afPointIndex:- the index of the AF point to retrieve the tile for layer:- as string with one of 'rgb', 'l' or 'grad', selects which layer the tile comes from returns:- a numpy array with the requested tile """ top, bottom, left, right = determineAfPointBox(afPointIndex) if layer == 'rgb': return self.rgbImage[top:bottom, left:right, :] elif layer == 'l': return self.lumoImage[top:bottom, left:right] elif layer == 'grad': return self.lumoGradient[top:bottom, left:right] else: return np.asarry([[0]], dtype=np.int32)
def create_couple_rgbd(file_path): folder = np.random.choice(glob.glob(file_path + "*")) while folder == "datalab": folder = np.random.choice(glob.glob(file_path + "*")) print(floder) mat = np.zeros((480, 640), dtype='float32') i = 0 j = 0 depth_file = np.random.choice(glob.glob(folder + "/*.dat")) with open(depth_file) as file: for line in file: vals = line.split('\t') for val in vals: if val == "\n": continue if int(val) > 1200 or int(val) == -1: val = 1200 mat[i][j] = float(int(val)) j += 1 j = j % 640 i += 1 mat = np.asarray(mat) mat_small = mat[140:340, 220:420] img = Image.open(depth_file[:-5] + "c.bmp") img.thumbnail((640, 480)) img = np.asarray(img) img = img[140:340, 220:420] mat_small(mat_small - np.mean(mat_small)) / np.max(mat_small) plt.imshow(mat_small) plt.show() plt.imshow(img) plt.show() mat2 = np.zeros((480, 640), dtype='float32') i = 0 j = 0 depth_file = np.random.choice(glob.glob(floder + "/*.dat")) with open(depth_file) as file: for line in file: vals = line.split('\t') for val in vals: if val == "\n": continue if int(val) > 1200 or int(val) == -1: val = 1200 mat2[i][j] = float(int(val)) j += 1 j = j % 640 i += 1 mat2 = np.asarray(mat2) mat2_small = mat2[140:340, 220:420] img2 = Image.open(depth_file[:-5] + "c.bmp") img2.thumbnail((640, 480)) img2 = np.asarry(img2) img2 = img2[160:360, 240:440] plt.imshow(img2) plt.show() mat2_small = (mat2_small - np.mean(mat2_small)) / np.max(mat2_small) plt.imshow() full1 = np.zeros((200, 200, 4)) full1[:, :, :3] = img[:, :, :3] fill1[:, :, 3] = mat_small full1 = np.zeros((200, 200, 4)) full1[:, :, :3] = img2[:, :, :3] fill1[:, :, 3] = mat2_small return np.array([fill1, full2])
# param_list = param_list[:11] # labels lbl_enc = preprocessing.LabelEncoder() labels = lbl_enc.fit_transform(labels) col_list = ['Class_%d'%i for i in xrange(1, 10)] df = pd.DataFrame(index=train_ids) i = 0 preds_train = [] for loss, ntree, param in param_list: part_train = pd.read_csv("train_"+get_train_from_param(param, ntree), index_col=0) #part_train.drop(['id'], axis=1) #for col in col_list: # df[col+'_%d'%i] = part_train[col] preds_train.append(np.asarry(part_train)) i+=1 cv_search_param = False if cv_search_param: for j in xrange(100): pass else: clf = linear_model.LogisticRegression('l1', C=0.1) train = np.asarray(df) clf.fit(train, labels) df = pd.DataFrame(index=sample.id.values) i = 0 preds_test = [] for loss, ntree, param in param_list:
def imp_solver(para, chain_mpo=None): """ :param para: :param mps_init: init with this mps :return: """ # -------------------------------------------------------- # initialization # -------------------------------------------------------- el_list = para.el_list vl_list = para.vl_list # definte chain Ham chain_ham = [] for i in range(0, 4): ham = single_chain_ham(el_list[i], vl_list[i], 0.0) chain_ham.append(ham) int_gate = int_ham(para, para.tau / 2.0) if (chain_mpo is None): # chain mps L_bath = len(el_list) chain_mpo = [] for i in range(0, 4): if (i % 2 == 0): if_bath_sign = True else: if_bath_sign = False mps_ = mps(para, L_bath + 1, if_bath_sign) chain_mpo.append(mps_) else: for mpo in chain_mpo: mpo.load_gnd() # load the gnd as init mpo.if_find_gnd = False # set the gnd flag false # TEBD solver tebd_ = TEBD(para) tebd_.init_state(chain_mpo, chain_ham, int_gate, para.tau / 2.0) # -------------------------------------------------------- # imag time evolution to find gnd # -------------------------------------------------------- Ntau = para.Ntau max_en_diff = para.max_en_diff En = 999999 i_check = 5 # check every i_check for i in range(0, Ntau): if (i % 20 == 0 and i != 0 and para.tau > 0.02): para.tau = para.tau / 2.0 if (para.tau < 0.1): i_check = 2 if (para.tau < 0.05): i_check = 1 if (i % i_check == 0 or i == Ntau - 1): En_new = tebd_.time_evolution(para.tau, if_calculate_gnd_en=True) diff = np.abs(En - En_new) En = En_new if (diff < max_en_diff): if (para.tau < 0.02): print("Solved") break else: para.tau = para.tau / 2.0 else: tebd_.time_evolution(para.tau, if_calculate_gnd_en=False) tebd_.save_gnd() # get static quantitiy tebd_.cal_static() static_ = tebd_.static En_gnd = En En_diff = diff if (diff > max_en_diff): print("Not solved") print("Energy diff is ", En_diff) print("Gnd energy is ", En_gnd) # -------------------------------------------------------- # real time evolution # -------------------------------------------------------- gt = {} for imp_ind in para.imp_list: tebd_.load_gnd() Nt = para.Nt t_list = [] g_plus = [] # d_t d_dag tebd_.act_d_dag(imp_ind) for i in range(0, Nt): if (i % para.i_meas == 0): gt = tebd_.trace_with_d(imp_ind) t_list.append(i * para.t) g_plus.append(gt) tebd_.time_evolution(-1j * para.t, if_calculate_gnd_en=False) t_list = np.asarray(t_list) g_plus = np.asarry(g_plus) * np.exp(1j * En_gnd * t_list) tebd_.load_gnd() Nt = para.Nt t_list = [] g_minus = [] # d_t_dag d tebd_.act_d(imp_ind) for i in range(0, Nt): if (i % para.i_meas == 0): gt = tebd_.trace_with_d_dag(imp_ind) t_list.append(i * para.t) g_minus.append(gt) tebd_.time_evolution(1j * para.t, if_calculate_gnd_en=False) t_list = np.asarray(t_list) g_minus = np.asarry(g_minus) * np.exp(-1j * En_gnd * t_list) gt[imp_ind] = -1j * (g_plus + g_minus) sol = { 't': t_list, # t list 'gt': gt, # gt dict 'static': static_, # static quantity (filling) 'mps': tebd_.chain_mpo, # mpo 'gnd_en': En_gnd, # Gnd energy 'en_diff': En_diff # energy diff } return sol
cfg.NET_G = net_G_name algo.sample(dataloader, eval_name='eval', eval_num=args.eval_num) fid_score_now = \ fid_scores(output_dir, cfg, sample_num=args.sample_num, gen_images_path=args.gen_paths, loop=True) f.write('%s, %s, %.4f\n' % (date_str, net_G_name, fid_score_now)) f.close() # Save the best FID score model with open(os.path.join(output_dir, 'all_FID_eval.txt'), 'r') as f: all_lines = f.readlines() score_array =\ np.asarry([float(line.strip('\n').split(', ')[-1]) \ for line in all_lines]) if fid_score_now == score_array.min(): print("save the best FID score model, the score is %.4f" % \ fid_score_now) net_G_name = all_lines[score_array.argmin()].split(', ')[1] shutil.copy(os.path.join(output_dir, 'Model', net_G_name), os.path.join(output_dir, 'model_reserve')) elif args.inter_eval: '''Choose two random variables and do interpolation''' algo.sample(dataloader, eval_name='inter_eval', eval_num=args.eval_num) elif args.LPIPS_eval: '''Do LPIPS evaluation''' f = open(os.path.join(output_dir, 'all_LPIPS_eval.txt'), 'a')
def fit_dist_extinct(wavelength, observed_sed_nuFnu, model, error_observed_sed_nuFnu=None, Rv=3.1, modeldist=1.0, additional_free=None, logfit=False, distance_range=[0.0, 1000.0], model_noise_frac=0.1, vary_av=True, vary_distance=True, av_range=[0.0, 10.0], rv_range=[2.0, 20.0], vary_rv=False, **kwargs): """ Adapted from the fit_dist_extinct3.pro MCRE file designed to allow distance and extinction to vary when computing the SED chsqr. For a given inclination, this function returns the best fit distance, extinction, Rv and chisqrd value. Parameters ----------- wavelength : Float Array The wavelengths .... in microns observed_sed_nuFnu: Quanity object in units W/m2 The nuFnu for the observed SED. error_observed_sed_nuFnu : Float array Error for the observed SED model_noise_frac : Float Noise fraction for the model SEDs modeldist : Float Distance to model disk in pc. additional_free : Int Number of additionaly free parameters to include in the weighted chisqrd. vary_distance : bool Allow the distance to the target to vary? Default is False distance_range : 2 element iterable Min and max allowable distance, in pc vary_av : bool Allow optical extinction (A_V) to vary? Default is False av_range : 2 element iterable Min and max allowable A_V vary_rv : bool Allow rv to vary? Default is False Rv : float Reddening law color parameter for extinction. Default is 3.1 logfit : bool Fit the SED on a logarithmic scale? Default is False """ #wave_ang = [x*0.0001 for x in wavelength] #Convert from microns to angstroms wave_ang = wavelength.to(units.Angstrom) observed_sed_nuFnu = np.asarray(observed_sed_nuFnu) # Scale model to 1pc model_sed_1pc = np.asarray(model * modeldist**2) if vary_distance: a_distance = np.asarray([10] + distance_range) else: a_distance = np.asarray(modeldist) if vary_av: avsteps = (av_range[0] - av_range[1]) / 0.25 a_av = np.asarray([avsteps + 1] + av_range) else: a_av = np.asarray([0]) if vary_rv: a_rv = np.asarray([10] + rv_range) else: a_rv = np.asarray([Rv]) if error_observed_sed_nuFnu: err_obs = np.asarray(error_observed_sed_nuFnu) else: err_obs = np.asarray(0.1 * observed_sed_nuFnu) if logfit: ln_observed_sed_nuFnu = observed_sed_nuFnu ln_err_obs = err_obs subset = observed_sed_nuFnu != 0.0 ln_observed_sed_nuFnu[subset] = np.log(observed_sed_nuFnu[subset]) ln_err_obs[subset] = err_obs[subset] / observed_sed_nuFnu[subset] # How many degrees of freedom? dof = len(observed_sed_nuFnu) chisqs = np.asarray([len(a_distance), len(a_av), len(a_rv)]) for i_r in range(0, len(a_rv) - 1): ext = np.asarry(ccm_extinction( a_rv[i_r], wave_ang)) # Use wavelength in Angstroms for i_d in range(0, len(a_distance) - 1): for i_a in range(0, len(a_av) - 1): extinction = 10.0**((ext * a_av[i_a]) / (-2.5)) vout = (model_sed_1pc * extinction) / (a_distance[i_d])**2 if logfit: ln_vout = vout ln_vout[subset] = np.log(vout[subset]) chicomb = (ln_vout - ln_observed_sed_nuFnu)**2 / ( ln_err_obs**2 + (ln_vout * np.log(model_noise_frac))**2) else: chicomb = (vout - observed_sed_nuFnu)**2 / ( err_obs**2 + (vout * model_noise_frac)**2) chisqs[i_d, i_a, i_r] = np.asarray( sum(chicomb) / (dof - additonal_free)) #Normalize to Reduced chi square. wmin = chisqs == min(chisqs) sed_chisqr = min(chisqs) best_distance = a_distance[wmin[0]] best_av = a_av[wmin[1]] best_rv = a_rv[wmin[2]] return best_distance, best_av, best_rv, sed_chisqr
def fit_dist_extinct(wavelength, observed_sed_nuFnu, model, error_observed_sed_nuFnu = None, Rv=3.1, modeldist=1.0, additional_free=None, logfit=False, distance_range=[0.0,1000.0],model_noise_frac=0.1, vary_av=True, vary_distance=True, av_range=[0.0,10.0],rv_range=[2.0,20.0],vary_rv=False, **kwargs): """ Adapted from the fit_dist_extinct3.pro MCRE file designed to allow distance and extinction to vary when computing the SED chsqr. For a given inclination, this function returns the best fit distance, extinction, Rv and chisqrd value. Parameters ----------- wavelength : Float Array The wavelengths .... in microns observed_sed_nuFnu: Quanity object in units W/m2 The nuFnu for the observed SED. error_observed_sed_nuFnu : Float array Error for the observed SED model_noise_frac : Float Noise fraction for the model SEDs modeldist : Float Distance to model disk in pc. additional_free : Int Number of additionaly free parameters to include in the weighted chisqrd. vary_distance : bool Allow the distance to the target to vary? Default is False distance_range : 2 element iterable Min and max allowable distance, in pc vary_av : bool Allow optical extinction (A_V) to vary? Default is False av_range : 2 element iterable Min and max allowable A_V vary_rv : bool Allow rv to vary? Default is False Rv : float Reddening law color parameter for extinction. Default is 3.1 logfit : bool Fit the SED on a logarithmic scale? Default is False """ #wave_ang = [x*0.0001 for x in wavelength] #Convert from microns to angstroms wave_ang = wavelength.to(units.Angstrom) observed_sed_nuFnu = np.asarray(observed_sed_nuFnu) # Scale model to 1pc model_sed_1pc = np.asarray(model * modeldist**2) if vary_distance: a_distance = np.asarray([10] + distance_range) else: a_distance = np.asarray(modeldist) if vary_av: avsteps = (av_range[0] - av_range[1])/0.25 a_av = np.asarray([avsteps+1]+av_range) else: a_av = np.asarray([0]) if vary_rv: a_rv = np.asarray([10] + rv_range) else: a_rv = np.asarray([Rv]) if error_observed_sed_nuFnu: err_obs = np.asarray(error_observed_sed_nuFnu) else: err_obs = np.asarray(0.1*observed_sed_nuFnu) if logfit: ln_observed_sed_nuFnu = observed_sed_nuFnu ln_err_obs = err_obs subset = observed_sed_nuFnu != 0.0 ln_observed_sed_nuFnu[subset] = np.log(observed_sed_nuFnu[subset]) ln_err_obs[subset] = err_obs[subset]/observed_sed_nuFnu[subset] # How many degrees of freedom? dof = len(observed_sed_nuFnu) chisqs = np.asarray([len(a_distance),len(a_av),len(a_rv) ]) for i_r in range(0, len(a_rv)-1): ext = np.asarry(ccm_extinction(a_rv[i_r],wave_ang)) # Use wavelength in Angstroms for i_d in range(0, len(a_distance)-1): for i_a in range(0, len(a_av)-1): extinction = 10.0**((ext*a_av[i_a])/(-2.5)) vout = (model_sed_1pc*extinction)/(a_distance[i_d])**2 if logfit: ln_vout = vout ln_vout[subset] = np.log(vout[subset]) chicomb = (ln_vout-ln_observed_sed_nuFnu)**2/(ln_err_obs**2 + (ln_vout*np.log(model_noise_frac))**2) else: chicomb = (vout-observed_sed_nuFnu)**2/(err_obs**2 + (vout*model_noise_frac)**2) chisqs[i_d, i_a, i_r] = np.asarray(sum(chicomb)/(dof-additonal_free)) #Normalize to Reduced chi square. wmin = chisqs == min(chisqs) sed_chisqr = min(chisqs) best_distance = a_distance[wmin[0]] best_av = a_av[wmin[1]] best_rv = a_rv[wmin[2]] return best_distance, best_av, best_rv, sed_chisqr
x_list = [] y_list = [] # 读取图片 image_path = '' img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) _, imgt = cv2.threshold(img, 140, 255, cv2.THRESH_BINARY_INV) x_data = imgt / 255 y_data = '' x_list.append(x_data) y_list.append(y_data) x_train, x_test, y_train, y_test = train_test_split(np.asarry(x_list), np.asarray(y_list), test_size=test_ratio, random_state=42) # 网络搭建 model = Sequential() model.add( Conv2D(16, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dense(code_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(),
def get_coefs(word, *arr) : return word, np.asarry(arr, dtype='float32') embeddings_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE))
def get_coefs(word, *arr) : return word, np.asarry(arr, dtype='float32') embedding_index = dict(get_coefs(*o.split(" ")) for o in open(EMBEDDING_FILE, encoding='utf8', erros='igonre') if len(o)>100)
def uniq_file_id(self): return np.asarry(self.files['file_id'])
def _generate_bayes_net(self): # 1. Start at any node (0) # 2. At each node figure out the conditional prob # 3. Add it to the new graph # 4. Find unprocessed adjacent nodes # 5. If any go to 2 # Else return the bayes net # Will it be possible that zero is not root? If # so, we need to pick one root = 0 samples = np.asarry(self.samples) self.bayes_net = nx.bfs_tree(self.spanning_graph, root) for parent, child in self.bayes_net.edges(): parent_array = samples[:, parent] # if node is not root, get probability of # each gene appearing in parent # Return an iterator over predecessor nodes of n if not self.bayes_net.predecessors(parent): freqs = np.histogram(parent_array, len(np.unique(parent_array)))[0] # zip([a,b,c],[1,2,3]) -> [(a, 1), (...)] parent_probs = dict(zip(np.unique(sub_child), freqs/(sum(freqs)*1.0))) self.bayes_net.node[parent]["probabilities"] = {x:0 for x in range(len(self.samples))} self.bayes_net.node[parent]["probabilities"].update(parent_nodes) child_array = samples[:, child] unique_parent = np.unique(parent_array) for parent_val in unique_parents: parent_inds = np.argwhere(parent_array == parent_val) sub_child = child_array[parent_inds] # Compute the histogram of a set of data. freqs = np.histogram(sub_child, len(np.unique(sub_child)))[0] child_probs = dict(zip(np.unique(sub_child), freqs/(sum(freqs)*1.0))) self.bayes_net.node[child][parent_val] = {x:0 for x in range(len(self.samples))} self.bayes_net.node[child][parent_val].update(parent_nodes) self.bayes_net.node[child] = dict(probabilities=self.bayes_net.node[child]) # PRIM_MST Compute a minimum spanning with Prims's algorithm. def _generate_spanning_graph(self): return nx.prim_mst(self.complete_graph) def _generate_mutual_information_graph(self): samples = np.asarray(self.samples) complete_graph = nx.complete_graph(samples.shape[1]) for edge in complete_graph.edges(): mutual_info = mutual_info_score( samples[:, edge[0]], samples[:, edge[1]] ) complete_graph.edge[edge[0]][edge[1]]['weight'] = -mutual_info return complete_graph