def train(self, data, lables): optimizer = AdamW(self.model.parameters(), correct_bias=False, lr=1e-5) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=self.args.warmup_steps, num_training_steps=self.args.num_training_steps) self.model.zero_grad() epochs = tnrange(self.args.epoch) for current_epoch in epochs: iterations = tnrange(len(lables) // self.args.batch_size) batch = self.make_bach(data, lables, self.args.batch_size) for _ in iterations: batch_data, batch_lables = next(batch) self.model.train() batch_data, batch_lables = self.preprocess_training_data( batch_data, batch_lables) if self.args.device == 'gpu': batch_data = batch_data.to('cuda') batch_lables = batch_lables.to('cuda') loss, res = self.model(batch_data, labels=batch_lables)[:2] loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.max_grad_norm) optimizer.step() scheduler.step() self.model.zero_grad()
def idft(F): f = np.zeros(F.shape, dtype=np.complex) N = F.shape[0] M = F.shape[1] for h in tnrange(N, desc="Height Loop"): for w in tnrange(M, desc = "Width Loop"): f[h][w] = icalc(F, w, h) return f
def calculate_R(split_halves, step_size): """ Calculate R coefficient """ result = { "iterations": [], "R": [], "within_seq_var": [], "between_seq_var": [], "var_over_est": [] } counter = 0 flag = 0 n = len(split_halves[0]) tot = int(log(n / step_size, 2)) + 2 for i in tnrange(tot, desc="Progress"): if flag == 1: counter += counter else: counter += step_size flag = 1 counter = min(n, counter) s = time.time() print("\nCalculating Variance for 1st {} iterations".format(counter)) my_seq = [sequence[:counter] for sequence in split_halves] print(f'Temp. check: seq length is {len(my_seq[0])}') W, B = calculate_variance(my_seq) result["iterations"].append(counter) result["within_seq_var"].append(W) result["between_seq_var"].append(B) result["var_over_est"].append(((n - 1) / n) * W + (1 / n) * B) result["R"].append((result["var_over_est"][-1] / W)**0.5) print("Time taken for job={:.1f}s".format(time.time() - s)) result = pd.DataFrame(result) return (result, split_halves)
def beam_search(model, context, length, beam_size, device, temperature=1): """ Generate sequence using beam search https://machinelearningmastery.com/beam-search-decoder-natural-language-processing/ Args: model: gpt/gpt2 model context: tokenized text using gpt/gpt2 tokenizer length: length of generated sequence. beam_size: >=1 and <= total_no_of_tokens device: torch.device object. temperature >0: used to control the randomness of predictions by scaling the logits before applying softmax. """ context = torch.tensor(context, dtype=torch.long, device=device) context = context.unsqueeze(0) with torch.no_grad(): inputs = {'input_ids': context} outputs = model(**inputs) next_token_logits = outputs[0][0, -1, :] / temperature next_token_probs = F.softmax(next_token_logits) scores, indices = torch.topk(next_token_probs, beam_size) indices = indices.tolist() sequences = [[c] for c in indices] for _ in tnrange(length-1): logits = torch.zeros(beam_size*len(next_token_logits)) for j in range(len(sequences)): new_generated = torch.cat((context,torch.tensor([sequences[j]], dtype=torch.long, device=device)),dim=1) inputs = {'input_ids': new_generated} outputs = model(**inputs) next_token_logits = outputs[0][0, -1, :] / temperature next_token_probs = F.softmax(next_token_logits) start, stop = j*len(next_token_logits), (j+1)*len(next_token_logits) logits[start:stop] = scores[j]*next_token_probs scores, new_logits_indices = torch.topk(logits,beam_size) logits = (new_logits_indices%50259).tolist() for j in range(len(sequences)): sequences[j] = sequences[j]+[logits[j]] return scores, sequences
def get_cell_barcode_counts(bamfile, savefile): """ Generates a dictionary of counts for each barcode observed in the XC tag :param bamfile: bamfile output from dropseqtools (star_gene_exon_tagged) :param savefile: full path and name of file to save results in pickle format :return: Dictionary of counts for each barcode detected """ if os.path.exists(savefile): cell_barcode_counts = load_pickle(savefile) else: final_bam = pysam.AlignmentFile(bamfile, "rb") total_reads = int(final_bam.mapped) cell_barcode_counts = Counter() progress = tnrange(total_reads) for read in final_bam.fetch(): barcode = dict(read.tags)['XC'] cell_barcode_counts[barcode] += 1 progress.update(1) save_dict_as_pickle(cell_barcode_counts, savefile) return cell_barcode_counts
def mad_matrix(examples, clf, featuredataset, examplegenerator, attribute_name='car'): """ run examples experiments to see how cars are declaired the same or different by the clf classifier.abs examples(int): number of trials clf(classifier): classifier to make same/different distinciton fd(featureDataset) : allows joining of chip to features eg(experimentGenerator): makes expermients for testing """ ddg = defaultdict(int) ddb = defaultdict(int) for _ in tnrange(examples): cameras_test = examplegenerator.generate() match_id = get_match_id(cameras_test) goods, bads = make_good_bad(cameras_test, match_id) good0 = goods[0] good1 = goods[1] bad0 = bads[0] bad1 = bads[1] eval_good_bad(good0, good1, clf, featuredataset, ddg, ddb, attribute_name) eval_good_bad(bad0, bad1, clf, featuredataset, ddb, ddg, attribute_name) return (ddg, ddb)
def test_svm(examples, clf_train, fd_test, eg_test): """ score the trained SVM against test features examples(int): number of examples to run clf_train(modle): model for evaluating testing data fd_test(featureDataset): testing dataset eg_test(experimentGenerator): generated experiments from testing dataset out(int): score from the model """ lessons_test = list() outcomes_test = list() for _ in tnrange(examples): cameras_test = eg_test.generate() match_id = get_match_id(cameras_test) goods, bads = make_good_bad(cameras_test, match_id) make_work(fd_test, lessons_test, outcomes_test, goods, 1) make_work(fd_test, lessons_test, outcomes_test, bads, 0) print('scoring') start = time.time() out = clf_train.score(lessons_test, outcomes_test) end = time.time() print('scoring took {} seconds'.format(end - start)) return out
def train_svm(examples, fd_train, eg_train): """ train a support vector machine examples(int): number of examples to generate fd_train(featureDataset): where to join features to chips eg_train(experimentGenerator): makes experiments clf(SVM): scm classifier trainined on the input examples """ lessons_train = list() outcomes_train = list() for _ in tnrange(examples): cameras_train = eg_train.generate() match_id = get_match_id(cameras_train) goods, bads = make_good_bad(cameras_train, match_id) make_work(fd_train, lessons_train, outcomes_train, goods, 1) make_work(fd_train, lessons_train, outcomes_train, bads, 0) clf = svm.SVC() print('fitting') start = time.time() clf.fit(lessons_train, outcomes_train) end = time.time() print('fitting took {} seconds'.format(end - start)) return clf
def abc(valid_digits, train, prior_sampler, model, metric, generator=SklearnDigitMixer, encoder=default_encoder, n_iter=100, use_tqdm=True): encoded_train = encoder(model, train) results = [] if use_tqdm: iterator = tqdm.tnrange(n_iter) else: iterator = range(n_iter) for i in iterator: params = prior_sampler(i) generated_data = generator(valid_digits, params)(train.shape[0]) encoded_data = encoder(model, generated_data) distance = metric(params, encoded_data, encoded_train) results.append((distance, params)) results.sort() return results
def _run(self, X): time_ = time.time() idx = range(self.memsize, len(X)) loss = 0 if chainer.config.train: random.shuffle(idx) for i in tqdm.tnrange(0, len(idx), self.batchsize, leave=False): x = [] a = [] for j in xrange(i, min(i + self.batchsize, len(idx) - 1)): x.append(X[idx[j] - self.memsize:idx[j]]) a.append(X[idx[j]]) x = self.xp.array(x, 'int32') a_hat = self.model(x) a = self.xp.array(a, 'int32') loss_ = a_hat.shape[0] * F.softmax_cross_entropy(a_hat, a) if chainer.config.train: self.model.cleargrads() loss_.backward() self.optimizer.update() loss += float(loss_.data) loss /= len(idx) perplexity = math.exp(loss) throughput = len(idx) / (time.time() - time_) return loss, perplexity, throughput
def best_subset(X, y): """Runs a linear model fit for every possible combination of features""" RSS_list, R_squared_list, feature_list = [], [], [] models, numb_features = [], [] #Looping over k = 1 to k = 11 features in X for k in tnrange(1, len(X.columns) + 1, desc='Loop...'): #Looping over all possible combinations: from 11 choose k for combo in itertools.combinations(X.columns, k): temp_x = X[list(combo)] model = fit_lm(temp_x, y) rss = get_MSE(model, temp_x, y) * len(y) RSS_list.append(rss) #Append lists R_squared_list.append(model.score(temp_x, y)) models.append(model) feature_list.append(combo) numb_features.append(len(combo)) variance = np.var(y) #Store in DataFrame df = pd.DataFrame({ 'numb_features': numb_features, 'RSS': RSS_list, 'R_squared': R_squared_list, 'features': feature_list, 'Model': models }) df['BIC'] = df.apply( lambda x: BIC(x.RSS, variance, len(y), len(x.features)), axis=1) df['Cp'] = df.apply( lambda x: mallow_cp(x.RSS, variance, len(y), len(x.features)), axis=1) df['adj_r2'] = df.apply(lambda x: adjusted_r2(x.RSS, y, len(x.features)), axis=1) return df
def prune_data(data, target, data_lens, dataset, threshold): """ Remove sentences from data and target if unknown words occur more than threshold, or is more than third of input sentence. """ n_iter = target.shape[0] prune_list = [] for i in tnrange(n_iter, desc='Pruning', unit=' lines'): data_unk_count = len(np.where(data[i] == dataset.unk_idx)[0]) target_unk_count = len(np.where(target[i] == dataset.unk_idx)[0]) if data_unk_count >= threshold or target_unk_count >= threshold: prune_list.append(i) elif data_lens[i] <= data_unk_count * 3: prune_list.append(i) else: # update word counts dataset.unk_count += (data_unk_count + target_unk_count) # 0 is assumed to be padding dataset.total_tokens += (np.count_nonzero(data[i]) + np.count_nonzero(target[i])) # delete sentences in prune list data = np.delete(data, prune_list, 0) target = np.delete(target, prune_list, 0) new_data_lens = [] for i in range(len(data_lens)): if i not in prune_list: new_data_lens.append(data_lens[i]) assert len(new_data_lens) == data.shape[0] return data, target, new_data_lens
def process_data(data_path, max_len, eos, pad): """ Return input, target split, and word count information from data. Precondition: file given by data_path has even number of lines. """ input_data = [] target_data = [] input_lens = [] word_count = Counter() with open(data_path, encoding='utf-8') as d_file: lines = d_file.read() sentences = lines.split('\n') n_lines = len(sentences) - 1 # sentences[-1] is '' n_iter = n_lines // 2 # process two lines at a time for i in tnrange(n_iter, desc='Processing', unit=' lines'): input_line = sentences[i * 2][:-1] # remove '\n' character input_line = process_sentence(input_line, max_len, word_count) target_line = sentences[i * 2 + 1][:-1] target_line = process_sentence(target_line, max_len, word_count) if input_line is not None and target_line is not None: input_data.append(input_line) target_data.append(target_line) input_lens.append(len(input_line)) assert len(input_data) == len(target_data) return input_data, target_data, word_count, input_lens
def getfits_x(self, ylist, x0=-1, x1=-1, method='gaussian'): """ fit multiple lines """ if x0 == -1: x0 = 0 if x1 == -1: x1 = self._img.shape[1] if method == 'gcdf': func = self.fit_gcdf else: func = self.fit_gaussian res = np.zeros((len(ylist), 6)) for i in tnrange(len(ylist)): self.update_coords(x0, ylist[i], x1, ylist[i]) if method == 'gcdf': self.fit_gcdf() else: self.fit_gaussian() res[i, :] = [ i, self._peaks[0], self._peaks[0] - self._left_inp[0], self._peaks[0] - self._right_inp[0], self._left_inp[0], self._right_inp[0] ] return res
def train_network(self, train_dl: DataLoader, epochs: int = 1, lr: float = 0.005): optimizer = torch.optim.Adam(self.parameters(), lr=lr) loss_function = nn.NLLLoss() training_results = [] for epoch in tnrange(epochs): for i, batch in enumerate(train_dl): x_batch, y_batch = batch y_batch = y_batch.long() # Reset optimizer optimizer.zero_grad() # Forward, backward then optimize outputs = self(x_batch) loss = loss_function(outputs, y_batch) loss.backward() optimizer.step() training_results.append(loss.item()) return training_results
def try_all(dat, Yname, multi): #Initialization variables Y = dat[Yname] #dat["Airport"] if (Yname == "Airport"): Y = Y.replace(regex={1: 0, 2: 1}) X = dat.drop(columns=Yname, axis=1) #k = 11 AIC_list, BIC_list,Log_list,RSquare_list, feature_list = [],[], [],[],[] numb_features = [] #Looping over k = 1 to k snip= 11 features in X for k in tnrange(1, len(X.columns) + 1, desc='Loop...'): #Looping over all possible combinations: from 11 choose k for combo in itertools.combinations(X.columns, k): tmp_result = fit_log_reg(X.loc[:, list(combo)], Y, multi) #Store temp result AIC_list.append(tmp_result[0]) #Append lists BIC_list.append(tmp_result[1]) Log_list.append(tmp_result[2]) RSquare_list.append(tmp_result[3]) feature_list.append(combo) numb_features.append(len(combo)) #Store in DataFrame df = pd.DataFrame({ 'numb_features': numb_features, 'AIC': AIC_list, 'BIC': BIC_list, 'Loglikelihood': Log_list, 'McFaddens R2': RSquare_list, 'features': feature_list }) return (df)
def read_data_bin(self, file_path, zero_one): self.inputs = [] self.outputs = [] file = open(file_path, "rb") self.samples_count, self.input_size, self.output_size = unpack( "iii", file.read(12)) for i in tnrange(self.samples_count): self.inputs.append( unpack( str(self.input_size) + "d", file.read(self.double_size * self.input_size))) self.outputs.append( unpack( str(self.output_size) + "d", file.read(self.double_size * self.output_size))) self.inputs = np.array(self.inputs) self.outputs = np.array(self.outputs) self.min_value = self.inputs.min() self.max_value = self.inputs.max() if (zero_one): self.inputs = self.scale_to_zero_one(self.inputs) file.close()
def save_photos_from_lmdb_to_png(self, moda_df, photos, photo_data): # This function matches the images in moda with the corresponding images # in the lmdb database and stores it as a png in the train_dir # Note, the .id in modaDf doesn't reflect photos.id. # Use the following code for better intution after running the for loop """ print(img_id) img_id = df.iloc[entry_idx].id idx=np.where(photos['id']==img_id) print(int(idx[0])) photo = photos.iloc[idx] df = moda_df """ entry_idx = 0 for i in tnrange(df.shape[0], desc='Saving images'): img_id = df.iloc[entry_idx].id idx = int(np.where(photos['id'] == img_id)[0]) photo = photos.iloc[idx] img_path = os.path.join(self.train_dir, "{:07d}.png".format(photo.id)) if (not photo_data[photo.id] is None) and ( not os.path.exists(img_path)): photo_data[photo.id].save(img_path, format="PNG") entry_idx += 1 print('Processed: {} entries '.format(entry_idx)) return "Photos saved at: {}".format(self.train_dir)
def download_data_date_range(stationID, start_d, end_d): try: start_date = datetime.strptime(start_d, '%b%Y') end_date = datetime.strptime(end_d, '%b%Y') frames = [] ran = [ dt for dt in rrule.rrule( rrule.MONTHLY, dtstart=start_date, until=end_date) ] ran_len = len(ran) for i in tnrange(ran_len, desc='Downloading Data'): sleep(0.01) df = getHourlyData(stationID, ran[i].year, ran[i].month) frames.append(df) weather_data = pd.concat(frames) weather_data['Date/Time'] = pd.to_datetime(weather_data['Date/Time']) weather_data['Temp (°C)'] = pd.to_numeric(weather_data['Temp (°C)']) return weather_data except: print( "INVALID INPUT. ENTER AN INTEGER FOR stationID, A STRING FOLLOWING THE FORMAT MonYEAR, i.e. Jun2015" )
def compute_distances_two_loops(self, X): """ Compute the distance between each test point in X and each training point in self.X_train using a nested loop over both the training data and the test data. Inputs: - X: A numpy array of shape (num_test, D) containing test data. Returns: - dists: A numpy array of shape (num_test, num_train) where dists[i, j] is the Euclidean distance between the ith test point and the jth training point. """ num_test = X.shape[0] num_train = self.X_train.shape[0] dists = np.zeros((num_test, num_train)) # for i in range(num_test): for i in tqdm.tnrange(num_test, desc='Test items'): for j in range(num_train): ############################################################### # TODO: # # Compute the l2 distance between the ith test point and # the jth # # training point, and store the result in dists[i, j]. # You should not use a loop over dimension. dists[i][j] = ssd.euclidean(X[i], self.X_train[j]) ############################################################### # END OF YOUR CODE # ############################################################## return dists
def elastic_net_alpha(data, label, ll, ul, step, ratio, weight, state): kf = KFold(n_splits=10, shuffle=True, random_state=state) X = data y = label r2 = [] mse = [] pred = [] true = [] ilist = [] feature = [] pbar = tnrange(step * 10, desc='loop') for i in np.linspace(ll, ul, step): r2_single = [] mse_single = [] pred_single = [] true_single = [] feature_single = [] for train_index, test_index in kf.split(X): y_train, y_test = y[train_index], y[test_index] X_train_tmp, X_test_tmp = X[train_index], X[test_index] regr = ElasticNet(random_state=state, alpha=i, l1_ratio=ratio) regr.fit(X_train_tmp, np.ravel(y_train)) feature_index = np.where(regr.coef_ > 0)[0] X_train = X_train_tmp[:, feature_index] X_test = X_test_tmp[:, feature_index] svr = svm.SVR(kernel='linear') svr.fit(X_train, np.ravel(y_train)) y_test_pred = svr.predict(X_test) feature_single.append(feature_index) pred_single.append(y_test_pred) true_single.append(np.ravel(y_test)) r2_single.append(r2_score(y_test, y_test_pred)) mse_single.append(mean_squared_error(y_test, y_test_pred)) pbar.update(1) r2.append(r2_single) mse.append(mse_single) pred.append(pred_single) true.append(true_single) feature.append(feature_single) ilist.append(i) r2 = np.array(r2) r2_mean = np.average(r2, axis=1, weights=weight) pbar.close() plt.figure() plt.plot(np.linspace(ll, ul, step), r2_mean) plt.xlabel('alpha') plt.ylabel('$R^2$') a = np.where(r2_mean == max(r2_mean))[0] pred = np.array(pred)[a[0]] true = np.array(true)[a[0]] r2 = r2[a[0]] mse = np.array(mse)[a[0]] feature = np.array(feature)[a[0]] a = ilist[a[0]] print('max r2_score=', np.max(r2_mean), ', corresponding alpha=', a) feature = feature[np.where(r2 == max(r2))][0] print('number of selected features:', len(feature)) return pred, true, r2, mse, feature, a
def _combine_and_synthesize(): df_comb = dd.read_csv('state_{}_puma_*_generated.csv'.format(STATE)) df_next = df_comb[['tract', 'num_people', 'num_vehicles', 'household_id_x', 'serial_number', 'repeat_index']] df_next.compute().to_csv('combined_pop.csv') df = pd.read_csv('combined_pop.csv') df.num_people = df.num_people.replace('4+', 4).astype(int) tract_sd = pd.concat([df['num_people']['std'], df['num_vehicles']['std']], axis=1) tract_sd.columns = ['hh_sd', 'car_sd'] tract_gdf = gpd.read_file("input/sample_data/tl_2016_{}_tract/tl_2016_{}_tract.shp".format(STATE, STATE)) tract_sd_df = pd.DataFrame(tract_sd) tract_sd_df['tract'] = tract_sd_df.index df = df.drop(['Unnamed: 0', 'serial_number', 'repeat_index', 'person_id'], axis=1) df.drop_duplicates(inplace=True) def compute_row(i): row = df.iloc[i] tract_no = row.tract pt = get_random_point_in_polygon(tract_gdf[(tract_no == tract_gdf.tract)].geometry.values[0]) return np.array([str(row.household_id_x), int(row.num_people), int(row.num_vehicles), float(pt.x), float(pt.y)]) res = [] for i in tnrange(df.shape[0], desc='1st loop'): res.append(compute_row(i)) out_df = dd.from_array(res).compute() out_df.to_csv("output/hhOut.csv", header=False, index=False)
def train_cosine(epochs, dl_train, model, crit, optim, sched_lens=None, dl_val=None): '''sched_mul can be a single int or a list''' n = len(dl_train.dataset) sched = torch.optim.lr_scheduler.CosineAnnealingLR(optim, n) result = {'loss_train': []} if dl_val: result['loss_val'] = [] lr_history = [] if sched_lens is None: sched_lens = 1 if isinstance(sched_lens, int): sched_lens = [sched_lens]*epochs if len(sched_lens) < epochs: sched_lens += [sched_lens[-1]] * (epochs-len(sched_lens)) for epoch in tnrange(epochs): print(f'{epoch+1}/{epochs}:', end='') for k in result: train = k == 'loss_train' dl = dl_train if train else dl_val running_loss = 0.0 cycles = sched_lens[epoch] if train else 1 sched.T_max = n * cycles for _ in range(cycles): for data in dl: if train: sched.step() lr_history += sched.get_lr() loss = step(model, crit, optim, data, train) running_loss += loss * data[0].size(0) result[k] += [running_loss / len(dl.dataset) / cycles] print(f' {"train" if train else "val"}({result[k][-1]:0.4f})', end='') print() sched.last_epoch = -1 return result, lr_history
def store_data_in_soup_frames(province,start_year,max_pages): try: if province in {'BC','PE','NS','NL','NB','QC','ON','MB','SK','AB','YT','NT','NU'} and type(start_year)==str and len(start_year)==4 and type(max_pages)==int: # Store each page in a list and parse them later soup_frames = [] for i in tnrange(100, desc='Downloading Data'): startRow = 1 + i*100 sleep(0.01) base_url = "http://climate.weather.gc.ca/historical_data/search_historic_data_stations_e.html?" queryProvince = "searchType=stnProv&timeframe=1&lstProvince={}&optLimit=yearRange&".format(province) queryYear = "StartYear={}&EndYear=2017&Year=2017&Month=5&Day=29&selRowPerPage=100&txtCentralLatMin=0&txtCentralLatSec=0&txtCentralLongMin=0&txtCentralLongSec=0&".format(start_year) queryStartRow = "startRow={}".format(startRow) response = requests.get(base_url + queryProvince + queryYear + queryStartRow) # Using requests to read the HTML source soup = BeautifulSoup(response.text, 'html.parser') # Parse with Beautiful Soup soup_frames.append(soup) return soup_frames else: print("INVALID INPUT\n\nENTER A PROVINCE AS A STRING:\n'BC','PE','NS','NL','NB','QC','ON','MB','SK','AB','YT','NT'\nENTER YEAR AS A STRING:'1992'\nENTER A MAXIMUM NUMBER OF PAGES AS AN INTEGER, i.e. 1,2,3,4,5") except: print("INVALID INPUT\n\nENTER A PROVINCE AS A STRING:\n'BC','PE','NS','NL','NB','QC','ON','MB','SK','AB','YT','NT'\nENTER YEAR AS A STRING:'1992'\nENTER A MAXIMUM NUMBER OF PAGES AS AN INTEGER, i.e. 1,2,3,4,5")
def generate_pandas_dataframe_from_soups(soup_frames): # Empty list to store the station data station_data = [] for i in tnrange(len(soup_frames), desc='Generating Pandas DataFrames'):# For each soup sleep(0.01) forms = soup_frames[i].findAll("form", {"id" : re.compile('stnRequest*')}) # We find the forms with the stnRequest* ID using regex for form in forms: try: # The stationID is a child of the form station = form.find("input", {"name" : "StationID"})['value'] # The station name is a sibling of the input element named lstProvince name = form.find("input", {"name" : "lstProvince"}).find_next_siblings("div")[0].text # The intervals are listed as children in a 'select' tag named timeframe timeframes = form.find("select", {"name" : "timeframe"}).findChildren() intervals =[t.text for t in timeframes] # We can find the min and max year of this station using the first and last child years = form.find("select", {"name" : "Year"}).findChildren() min_year = years[0].text max_year = years[-1].text # Store the data in an array data = [station, name, intervals, min_year, max_year] station_data.append(data) except: pass # Create a pandas dataframe using the collected data and give it the appropriate column names stations_df = pd.DataFrame(station_data, columns=['StationID', 'Name', 'Intervals', 'Year Start', 'Year End']) return stations_df
def subcatToMaincat(df_in): '''Function returns a dataframe with labels Input: df_in - dataframe ''' df_in['mainCat'] = '' for irow in tnrange(df_in.shape[0]): if df_in.at[irow, 'subCat'] in ['3', '9', '15']: df_in.at[irow, 'mainCat'] = 'maindish' elif df_in.at[irow, 'subCat'] in [ '6', '7', '11', '12', '13', '14', '16', '17', '18', '19', '20' ]: df_in.at[irow, 'mainCat'] = 'sidedish' elif df_in.at[irow, 'subCat'] in ['4', '5', '8', '10']: df_in.at[irow, 'mainCat'] = 'dessert' elif df_in.at[irow, 'subCat'] in ['1']: df_in.at[irow, 'mainCat'] = 'condiments' elif df_in.at[irow, 'subCat'] in ['2']: df_in.at[irow, 'mainCat'] = 'salad' return df_in
def parameter_study_var(a,b,c,d,e,f,R, filename): ''' Creates an array of all possible combinations for the ranges provided in the inputs for the three different variables and runs the model for each combination Inputs: a: start value for Initial Water b: end value for Initial Water c: start value for Remaining Water d: end value for Remaining Water e: start value for Outgassing f: end value for Outgassing R: fractionation factor filename: name of file to save dataframe of results to (must be in the form 'filename.csv') Output: Output: A dataframe with all the succesful runs of the model, dataframe contains columns of the Initial amount, Remaining amount, Outgassed amount, DH ratio and Escape values listed ''' #initial H2O total number of points n1 = b-a #remaining H2O total number of points n2 = d-c #outgassed H2O total number of points n3 = f-e # creating an array for each variable individually spaced by one unit m GEL init = np.linspace(a, b, n1+1) rem = np.linspace(c, d, n2+1) outg = np.linspace(e, f, n3+1) #Combining the three arrays to make one large array that contains all possible #combinations of the three variable in the provided ranges vals = [] for i in range(0,n1+1): for j in range(0,n2+1): for k in range(0,n3+1): vals.append([init[i], rem[j], outg[k],R]) #create an empty array to store the results of the parameter study in result = [] #looping through the parameter space as listed in vals and running the model for each entry in vals for i in tnrange(len(vals)): #tnrange shows a live progress-bar in the notebook enrichment, a = loop_var_box_model(*vals[i]) # checking result of model against success criteria # if enrichment matches current atmosphere, append entry to result.. otherwise discard if 5 <= enrichment <= 6: result.append([vals[i][0], vals[i][1], vals[i][2], enrichment, a]) #convert to pandas dataframe Dataframe_result= pd.DataFrame(result) Dataframe_result.columns = ['Initial','Remainder','Outgassed','DH','Escape'] #save dataframe to csv file externally Dataframe_result.to_csv(filename) return Dataframe_result
def preview(self, update=False, ncol=-1, max_width=400, max_height=400): """ generate collective image of thumbnails ------------------------------------------------ update: generate preview image from scratch ncol: number of columns in preview image max_width: size of each image """ if 'cv2' not in dir(): import cv2 tmp_filename = 'tmp_preview.png' # read from previous cache if (not update) and (ncol == -1) and os.path.exists(tmp_filename): preview_img = cv2.imread(tmp_filename, 0) # read as gray scale ratio = preview_img.shape[0]/preview_img.shape[1] plt.figure(figsize=(16, int(16*ratio))) plt.imshow(preview_img, cmap='gray') plt.axis('off') return # prepare mean figures if ncol == -1: ncol = 6 img_arr = [] frameN_arr = [] N = len(self._filelist) for i in tnrange(N): tif = ImageBase(self._filelist[i]) im = _resize_image(tif.tmean(), max_width-2, max_height-2) # add new images img_arr.append(im) frameN_arr.append(tif._meta.N()) # prepare output figure w, h = max_width, max_height nrow = int(np.ceil(N/ncol)) res = np.ones((max_height * nrow, max_width * ncol), dtype=np.uint8) * 255 if self._debug: print('... {} files, {} columns, {} rows, {} x {} pixels'.format(N, ncol, nrow, res.shape[0], res.shape[1])) # rescale and put in correct position for j in range(nrow): for i in range(ncol): tif_idx = i + j*ncol if tif_idx < self._fileN: # get a new image dimension ih, iw = img_arr[tif_idx].shape # save in a new array res[j*h + 1:j*h + ih + 1, i*w + 1:i*w + iw + 1] = img_arr[tif_idx] cv2.putText(res, '%i (%i)' % (tif_idx, frameN_arr[tif_idx]), (i*w+2, j*h+27), 2, 1.0, (255, 255, 255), 1, cv2.LINE_AA) if self._debug: print('... [%i] %s' % (tif_idx, self._filelist[tif_idx])) plt.figure(figsize=(16, int(16.0*nrow/ncol))) plt.imshow(res, cmap='gray') plt.axis('off') if self._debug: print('... save to %s' % tmp_filename) cv2.imwrite(tmp_filename, res) plt.show()
def omp(data, label, ll, ul, step, weight, state): kf = KFold(n_splits=10, shuffle=True, random_state=state) X = data y = label r2 = [] mse = [] pred = [] true = [] ilist = [] feature = [] pbar = tnrange(step * 10, desc='loop') for i in np.linspace(ll, ul, step).astype(int): r2_single = [] mse_single = [] pred_single = [] true_single = [] feature_single = [] for train_index, test_index in kf.split(X): y_train, y_test = y[train_index], y[test_index] X_train_tmp, X_test_tmp = X[train_index], X[test_index] clf = OrthogonalMatchingPursuit(n_nonzero_coefs=i, normalize=False) clf.fit(X_train_tmp, np.ravel(y_train)) feature_index = np.where(clf.coef_ > 0)[0] X_train = X_train_tmp[:, feature_index] X_test = X_test_tmp[:, feature_index] svr = svm.SVR(kernel='linear') svr.fit(X_train, np.ravel(y_train)) y_test_pred = svr.predict(X_test) feature_single.append(feature_index) pred_single.append(y_test_pred) true_single.append(np.ravel(y_test)) r2_single.append(r2_score(y_test, y_test_pred)) mse_single.append(mean_squared_error(y_test, y_test_pred)) pbar.update(1) r2.append(r2_single) mse.append(mse_single) pred.append(pred_single) true.append(true_single) feature.append(feature_single) ilist.append(i) r2 = np.array(r2) r2_mean = np.average(r2, axis=1, weights=weight) pbar.close() plt.figure() plt.plot(np.linspace(ll, ul, step), r2_mean) plt.xlabel('$non-zero coefficients$') plt.ylabel('$R^2$') a = np.where(r2_mean == max(r2_mean))[0] pred = np.array(pred)[a[0]] true = np.array(true)[a[0]] r2 = r2[a[0]] mse = np.array(mse)[a[0]] feature = np.array(feature)[a[0]] a = ilist[a[0]] print('max r2_score=', np.max(r2_mean), ', number of non-zero coefs=', a) feature = feature[np.where(r2 == max(r2))][0] print('number of selected features:', len(feature)) return pred, true, r2, mse, feature
def fit(self): """ Trains and evaluates the given model :param: :return: """ best_valid_loss = float('inf') counter = 0 for epoch in tnrange(0, self.epochs): tqdm_t = tqdm(iter(self.train_dl), leave=False, total=self.train_dlen) tqdm_v = tqdm(iter(self.val_dl), leave=False, total=self.val_dlen) train_loss, train_acc = self.train(self.model, tqdm_t, self.opt, self.loss_fn) valid_loss, valid_acc, _,_ = self.evaluate(self.model, tqdm_v, self.loss_fn) if valid_loss < best_valid_loss: self.save_checkpoint() best_valid_loss = valid_loss counter=0 self.logger.info("Best model saved!!!") else: counter += 1 self.logger.info(f'Epoch: {epoch+1} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.4f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.4f}%') if counter >= self.early_max_patience: self.logger.info("Training stopped because maximum tolerance reached!!!") break
def challenge_evaluate_performance(fn): score = 0 for i in tnrange(8, desc="Total"): wave = load_wave("data/secret_tests/challenge_valid_%d"%i) labels = true_labels[i] pred_labels = fn(wave) for j in range(3): # best of 3! score += test_classification_score(wave, labels, pred_labels) for j in tqdm_notebook(xrange(40), desc='Test case %d'%i): sleep(0.1) print "*** Total score: %.2f ***" % score return score
def star_disrupt(self, reb_sim, time): m_hole = sc.m_hole # Randomly drawn mass of star xstar = rnd.random() m_star = mstar_dist(xstar) self.star_masses.append(m_star) # Determined radius of star from stellar mass r_star = rstar_func(m_star) * sc.RsuntoAU self.star_radii.append(r_star) # Distance spread for fragments rads = [r_star * float(f) / float(self.Nfrag + 1) for f in range(self.Nfrag + 1)] rads.pop(0) # Determined tidal radius of star r_t = r_tidal(m_star, r_star) self.tidal_radii.append(r_t) self.orbital_vels.append(2.0 * m_hole / r_t) # Set position of star; random sphere point picking u1 = rnd.uniform(-1.0, 1.0) th1 = rnd.uniform(0., 2. * np.pi) star_direc = np.array([sqrt(1.0 - (u1)**2) * cos(th1), sqrt(1.0 - (u1)**2) * sin(th1), u1]) star_vec = [r_t * d for d in star_direc] # Binding energy spread, with beta value randomly drawn from # beta distribution xbeta = rnd.random() beta = beta_dist(xbeta) NRGs = self.dmde.energy_spread(beta, self.Nfrag) # Converted NRGs list from cgs to proper units natural_u = (u.AU / (u.yr / (2.0 * np.pi)))**2 nrg_scale = ((r_star * sc.AUtoRsun)**(-1.0) * (m_star)**(2.0 / 3.0) * (m_hole / 1.0e6)**(1.0 / 3.0)) energies = [(nrg_scale * nrg * (u.cm / u.second)**2).to(natural_u).value for nrg in NRGs] # Calculating velocities vels = [sqrt((2.0 * g) + (2.0 * m_hole / r_t)) for g in energies] # Randomly draw velocity vector direction phi2 = rnd.uniform(0., 2. * np.pi) x = star_vec[0] y = star_vec[1] z = star_vec[2] r = np.linalg.norm(star_vec) randomvelvec = [ (x * (r - z + z * cos(phi2)) - r * y * sin(phi2)) / (r**2 * sqrt(2.0 - 2.0 * z / r)), (y * (r - z + z * cos(phi2)) + r * x * sin(phi2)) / (r**2 * sqrt(2.0 - 2.0 * z / r)), ((r - z) * z - (x**2 + y**2) * cos(phi2)) / (r**2 * sqrt(2.0 - 2.0 * z / r)) ] velocity_vec = np.cross(star_vec, randomvelvec) n = np.linalg.norm(velocity_vec) vel_direc = [v / n for v in velocity_vec] for frag in tnrange(self.Nfrag, desc='Fragment', leave=False): # Velocity vector of fragment vel = vels[frag] frag_velvec = [vel * v for v in vel_direc] # Position vector of Fragment rad = rads[frag] frag_posvec = [(r_t + rad) * p for p in star_direc] # Add particle to rebound simulation reb_sim.add(m=0.0, x=frag_posvec[0], y=frag_posvec[1], z=frag_posvec[2], vx=frag_velvec[0], vy=frag_velvec[1], vz=frag_velvec[2]) self.sfindices.append(reb_sim.N - 1) print('Star disrupted, t= {0}'.format(time)) print('Number of particles: {0}'.format(reb_sim.N)) print(self.sfindices)
# Predict on train, val and test model = load_model('model-tgs-salt-1.h5' , custom_objects={'competitionMetric2': competitionMetric2 , 'iou_loss_core': iou_loss_core , 'castB': castB , 'castF': castF}) preds_train = model.predict(X_train[:int(X_train.shape[0]*0.9)], verbose=1) preds_val = model.predict(X_train[int(X_train.shape[0]*0.9):], verbose=1) preds_test = model.predict(X_test, verbose=1) # Threshold predictions preds_train_t = (preds_train > 0.5).astype(np.uint8) preds_val_t = (preds_val > 0.5).astype(np.uint8) preds_test_t = (preds_test > 0.5).astype(np.uint8) # Create list of upsampled test masks preds_test_upsampled = [] for i in tnrange(len(preds_test)): preds_test_upsampled.append(resize(np.squeeze(preds_test[i]), (sizes_test[i][0], sizes_test[i][1]), mode='constant', preserve_range=True)) preds_test_upsampled[0].shape def RLenc(img, order='F', format=True): bytes = img.reshape(img.shape[0] * img.shape[1], order=order) runs = [] ## list of run lengths r = 0 ## the current run length pos = 1 ## count starts from 1 per WK for c in bytes: if (c == 0): if r != 0: runs.append((pos, r)) pos += r
def runTFSimul(self): ################################################################################# ### INITIALISATION ################################################################################# N = self.N NI = self.NI NE = self.NE T = self.T with tf.device(self.device): scaling = 1 / (1 / (2 * 2 / self.dt)) ** 0.5 * 70 with tf.name_scope('membrane_var'): # Create variables for simulation state u = self.init_float([N, 1], 'u') # v = self.init_float([N, 1], 'v') v = tf.Variable(tf.random_normal([N,1], mean=-50, stddev=30, name='v')) # currents iBack = self.init_float([N, 1], 'iBack') iChem = self.init_float([N, 1], 'iChem') I = tf.Variable(tf.zeros([N,1]), name='I') input = tf.cast(tf.constant(self.input, name="input"), tf.float32) with tf.name_scope('spiking_bursting'): LowSp = self.init_float([N, 1], 'bursting') vv = self.init_float([N, 1], 'spiking') with tf.name_scope('monitoring'): vmE = self.init_float([T], "vm") vmI = self.init_float([T], "vm") umE = self.init_float([T], "um") umI = self.init_float([T], "um") vvmE = self.init_float([T], "vvm") vvmI = self.init_float([T], "vvm") pmE = self.init_float([T], "pm") pmI = self.init_float([T], "pm") lowspm = self.init_float([T], "lowspm") imE = self.init_float([T], "imE") imI = self.init_float([T], "imI") icmE = self.init_float([T], "icmE") icmI = self.init_float([T], "icmI") gm = self.init_float([T//self.weight_step + 1], "gm") iEffm = self.init_float([T], "iEffm") spikes = self.init_float([T, N], "spikes") with tf.name_scope('synaptic_connections'): # synaptics connection # conn = tf.constant(np.ones((N, N), dtype='float32') - np.diag(np.ones((N,), dtype='float32'))) conn, connEE, connII, connEI, connIE = makeConn(N, NE=NE, NI=NI) vectE, vectI = makeVect(N, NE=NE, NI=NI) self.conn = conn.eval() nbOfGaps = NI*(NI-1) if self.g0fromFile: self.g = getGSteady(self.tauv, 5, 1000) g0 = self.g / (nbOfGaps**0.5) wGap_init = (tf.random_normal((N, N), mean=g0, stddev=g0/2, dtype=tf.float32, seed=None, name=None)) wII_init = self.wII / ((NI*(NI-1))**0.5) / self.dt if NE>0: wEE_init = self.wEE / ((NE*(NE-1))**0.5) / self.dt else: wEE_init = 0 wIE_init = self.wIE / (NI*NE-1)**0.5 / self.dt wEI_init = self.wEI / (NI*NE-1)**0.5 / self.dt print('wII, wEE', wII_init, wEE_init) wGap = tf.Variable(tf.mul(wGap_init, connII)) WII = tf.Variable(tf.mul(wII_init, connII)) WEE = tf.Variable(tf.mul(wEE_init, connEE)) WEI = tf.Variable(tf.mul(wEI_init, connEI)) WIE = tf.Variable(tf.mul(wIE_init, connIE)) # plasticity learning rates A_LTD_ = 2.45e-5 * self.FACT * 400 / N A_LTD = tf.constant(A_LTD_, name="A_LTP", dtype=tf.float32) A_LTP = tf.constant(self.ratio * A_LTD_, name="A_LTD", dtype=tf.float32) with tf.name_scope("simulation_params"): # stimulation TImean = tf.constant(self.nu * 1.0, name="mean_input_current", dtype=tf.float32) if self.nuI != self.nuE: TImean = tf.constant(self.nuI * 1.0, name="mean_input_current", dtype=tf.float32)*vectI + tf.constant(self.nuE * 1.0, name="mean_input_current", dtype=tf.float32)*vectE # timestep dt = tf.constant(self.dt * 1.0, name="timestep", dtype=tf.float32) tauv = tf.constant(self.tauv*1.0, dtype=tf.float32) startPlast = self.startPlast weight_step = self.weight_step sim_index = tf.Variable(0.0, name="sim_index") one = tf.Variable(1.0) ones = tf.ones((1, N)) ################################################################################# ## Computation ################################################################################# with tf.device(self.device): with tf.name_scope('Currents'): # Discretized PDE update rules ps([WII, vv, vectI]) iChem_ = iChem + dt / 10 * (-iChem + tf.matmul(WII + WEI, tf.to_float(vv))) + dt / 40 * (-iChem + tf.matmul(WEE + WIE, tf.to_float(vv))) # current iBack_ = iBack + dt / 5 * (-iBack + tf.random_normal((N, 1), mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None)) input_ = input[tf.to_int32(sim_index)] # input to network: colored noise + external input iEff_ = iBack_ * scaling + input_ + TImean iGap_ = (tf.matmul(wGap, v) - tf.mul(tf.reshape(tf.reduce_sum(wGap, 0), (N, 1)), v))*vectI I_ = iGap_ + iChem_ + iEff_ ps([I_, iGap_, iEff_, input_, iBack_, iChem_]) # IZHIKEVICH with tf.name_scope('Izhikevich'): # voltage v_ = tf.mul(v + dt / tauv * (tf.mul((v + 60), (v + 50)) - 20 * u + 8 * I_), vectI) + tf.mul(v + dt / 10 * (0.7 * (v + 60) * (v + 40) - u + I_ ), vectE) # adaptation u_ = u + tf.mul(dt * 0.044 * (v_ + 55 - u), vectI) + tf.mul(dt * 0.03 * (-2 * (v + 60) - u), vectE) # spikes vv_ = tf.mul(tf.to_float(tf.greater(v_, 25.0)), vectI) + tf.mul(tf.to_float(tf.greater(v_, 35.0)), vectE) # reset v_ = tf.mul(vv_, -40.0)*vectI + tf.mul(vv_, -50.0)*vectE + tf.mul((1 - vv_), v_) u_ = u_ + 50*vv_*vectI + 100*vectE*vv_ # bursting with tf.name_scope('bursting'): LowSp_ = (LowSp + dt / 8.0 * (vv_ * 8.0 / dt - LowSp)) p_ = tf.to_float(tf.greater(LowSp_, 1.1)) # plasticity with tf.name_scope('plasticity'): A = tf.matmul(p_, ones, name="bursts") # bursts B = tf.matmul(vv_ * vectI, ones, name="spikes") # spikes dwLTD_ = A_LTD * tf.add(A, tf.transpose(A, name="tr_bursts")) dwLTP_ = A_LTP * tf.add(B, tf.transpose(B, name="tr_spikes")) dwGap_ = dt * tf.sub(dwLTP_, dwLTD_) wGap_ = tf.clip_by_value(wGap + dwGap_, clip_value_min=0, clip_value_max=10 ** 10) # monitoring with tf.name_scope('Monitoring'): vvmeanE_ = tf.reduce_sum(vv_*vectE) vvmeanI_ = tf.reduce_sum(vv_*vectI) vmeanE_ = tf.reduce_mean(v_*vectE) vmeanI_ = tf.reduce_mean(v_*vectI) umeanE_ = tf.reduce_mean(u_*vectE) umeanI_ = tf.reduce_mean(u_*vectI) pmeanE_ = tf.reduce_mean(p_*vectE) pmeanI_ = tf.reduce_mean(p_*vectI) lowspmean_ = tf.reduce_mean(LowSp_) imeanE_ = tf.reduce_mean(I_*vectE) imeanI_ = tf.reduce_mean(I_*vectI) icmeanE_ = tf.reduce_mean(iChem_*vectE) icmeanI_ = tf.reduce_mean(iChem_*vectI) iEffm_ = tf.reduce_mean(iEff_) update = tf.group( tf.scatter_update(vvmE, tf.to_int32(sim_index), vvmeanE_), tf.scatter_update(vvmI, tf.to_int32(sim_index), vvmeanI_), tf.scatter_update(vmE, tf.to_int32(sim_index), vmeanE_), tf.scatter_update(vmI, tf.to_int32(sim_index), vmeanI_), tf.scatter_update(umE, tf.to_int32(sim_index), umeanE_), tf.scatter_update(umI, tf.to_int32(sim_index), umeanI_), tf.scatter_update(pmE, tf.to_int32(sim_index), pmeanE_), tf.scatter_update(pmI, tf.to_int32(sim_index), pmeanI_), tf.scatter_update(lowspm, tf.to_int32(sim_index), lowspmean_), tf.scatter_update(imE, tf.to_int32(sim_index), imeanE_), tf.scatter_update(imI, tf.to_int32(sim_index), imeanI_), tf.scatter_update(icmE, tf.to_int32(sim_index), icmeanE_), tf.scatter_update(icmI, tf.to_int32(sim_index), icmeanI_), tf.scatter_update(iEffm, tf.to_int32(sim_index), iEffm_), sim_index.assign_add(one), ) with tf.name_scope('Weights_monitoring'): gm_ = tf.reduce_sum(wGap*connII) update_weights = tf.group( tf.scatter_update(gm, tf.to_int32(sim_index / weight_step), gm_), ) with tf.name_scope('Raster_Plot'): spike_update = tf.group( tf.scatter_update(spikes, tf.to_int32(sim_index), tf.reshape((vv_), (N,))), ) # Operation to update the state step = tf.group( iChem.assign(iChem_), iBack.assign(iBack_), LowSp.assign(LowSp_), v.assign(v_), vv.assign(vv_), u.assign(u_), ) plast = tf.group( wGap.assign(wGap_), ) # update_index = tf.group( # sim_index.assign_add(one), # ) # initialize the graph tf.global_variables_initializer().run() self.WII = WII.eval() self.WEE = WEE.eval() self.WIE = WIE.eval() self.WEI = WEI.eval() t0 = time.time() for i in tnrange(T): # Step simulation ops = {'plast': [step, plast, update], 'static': [step, update] } if self.spikeMonitor: for k, v in ops.items(): ops[k] = v + [spike_update] if i>startPlast: self.sess.run(ops['plast']) else: self.sess.run(ops['static']) if i % weight_step == 0: self.sess.run([update_weights]) # self.sess.run([update_index]) # Visualize every X steps if i % 1 == 0: if self.disp: clear_output(wait=True) self.DisplayArray(wGap.eval(), rng=[0, 1.5 * g0], text="%.2f ms" % (i * self.dt)) if i==0: self.w0 = wGap.eval() elif i==T-1: self.wE = wGap.eval() # monitoring variables self.vvmE = vvmE.eval() self.vvmI = vvmI.eval() self.vmE = vmE.eval() self.vmI = vmI.eval() self.umE = umE.eval() self.umI = umI.eval() self.pE = pmE.eval() self.pI = pmI.eval() self.lowsp = lowspm.eval() self.imE = imE.eval() self.imI = imI.eval() self.icmE = icmE.eval() self.icmI = icmI.eval() self.iEff = iEffm.eval() self.gamma = gm.eval() / np.sum(nbOfGaps) if self.spikeMonitor: self.raster = spikes.eval() self.burstingActivity = np.mean(self.pI) self.spikingActivity = np.mean(self.vvmI) # if i ==T//2: # x = tf.matmul(WEI, vv).eval() # print(x.shape, x.min()) # plt.imshow(x) print('%.2f' % (time.time() - t0)) self.sess.close()
def sim_integrate(self): m_hole = sc.m_hole star_masses = [] star_radii = [] tidal_radii = [] for star in tnrange(self.Nstars, desc='Star', leave=False): # Randomly drawn mass of star xstar = rnd.random() m_star = mstar_dist(xstar) star_masses.append(m_star) # Determined radius of star r_star = rstar_func(m_star) * sc.RsuntoAU star_radii.append(r_star) # Distance spread for fragments rads = [r_star * float(f)/float(self.Nfrag+1) for f in range(self.Nfrag+1)] rads.pop(0) # Determined tidal radius of star r_t = r_tidal(m_star, r_star) tidal_radii.append(r_t) # Set position of star; random sphere point picking u1 = rnd.uniform(-1.0, 1.0) th1 = rnd.uniform(0., 2. * np.pi) star_direc = np.array([sqrt(1.0 - (u1)**2) * cos(th1), sqrt(1.0 - (u1)**2) * sin(th1), u1]) star_vec = [r_t * d for d in star_direc] # Binding energy spread, with beta value randomly drawn from # beta distribution xbeta = rnd.random() beta = beta_dist(xbeta) NRGs = self.dmde.energy_spread(beta, self.Nfrag) # Converted NRGs list from cgs to proper units pi = np.pi natural_u = (u.AU / (u.yr / (2.0 * pi)))**2 nrg_scale = ((r_star * sc.AUtoRsun)**(-1.0) * (m_star)**(2.0 / 3.0) * (m_hole / 1.0e6)**(1.0 / 3.0)) energies = [(nrg_scale * nrg * (u.cm / u.second)**2).to(natural_u).value for nrg in NRGs] # Calculating velocities vels = [sqrt((2.0 * g) + (2 * m_hole / r_t)) for g in energies] # Randomly draw velocity vector direction phi2 = rnd.uniform(0., 2. * np.pi) x = star_vec[0] y = star_vec[1] z = star_vec[2] r = np.linalg.norm(star_vec) randomvelvec = [ (x * (r - z + z * cos(phi2)) - r * y * sin(phi2)) / (r**2 * sqrt(2.0 - 2.0 * z / r)), (y * (r - z + z * cos(phi2)) + r * x * sin(phi2)) / (r**2 * sqrt(2.0 - 2.0 * z / r)), ((r - z) * z - (x**2 + y**2) * cos(phi2)) / (r**2 * sqrt(2.0 - 2.0 * z / r)) ] velocity_vec = np.cross(star_vec, randomvelvec) n = np.linalg.norm(velocity_vec) for fi, frag in enumerate(tnrange(self.Nfrag, desc='Fragment', leave=False)): # Velocity vector of fragment vel = vels[frag] frag_velvec = [vel * v / n for v in velocity_vec] # Position vector of Fragment rad = rads[frag] frag_posvec = [(r_t + rad) * p for p in star_direc] # Set up rebound simulation reb_sim = rebound.Simulation() reb_sim.integrator = "ias15" reb_sim.add(m=m_hole) reb_sim.dt = 1.0e-15 # Add particle to rebound simulation reb_sim.add(m=0.0, x=frag_posvec[0], y=frag_posvec[1], z=frag_posvec[2], vx=frag_velvec[0], vy=frag_velvec[1], vz=frag_velvec[2]) reb_sim.N_active = 1 reb_sim.additional_forces = self.migrationAccel reb_sim.force_is_velocity_dependent = 1 reb_sim.exit_max_distance = 15.0 * sc.scale # 15 pc in AU ps = reb_sim.particles stop = np.log10(self.max_time) times = np.logspace(-17.0, stop, self.Nout) times = np.insert(times, 0.0, 0) for ti, time in enumerate(times): try: reb_sim.integrate(time, exact_finish_time=1) self.posx[star][frag].append(ps[1].x / sc.scale) self.posy[star][frag].append(ps[1].y / sc.scale) self.posz[star][frag].append(ps[1].z / sc.scale) except rebound.Escape as error: print(error) break # Semi-major axis criterion: # Cuts particles closely bound to black hole if (2.0 * ps[1].a / sc.scale > 0.0 and 2.0 * ps[1].a / sc.scale < 1.0): break