def simulate(n_simul, agents, grid_size, candy_ratio=1., max_iter=500): print("Simulations") wins = dict((id, 0.) for id in range(len(agents))) points = dict((id, []) for id in range(len(agents))) scores = dict((id, []) for id in range(len(agents))) iterations = [] for it in range(n_simul): progressBar(it, n_simul) endState = controller(agents, grid_size, candy_ratio=candy_ratio, max_iter=max_iter, verbose=0) if len(endState.snakes) == 1: wins[list(endState.snakes.keys())[0]] += 1. / n_simul points[list(endState.snakes.keys())[0]].append( list(endState.snakes.values())[0].points) for id in range(len(agents)): scores[id].append(endState.scores[id]) iterations.append(endState.iter) progressBar(n_simul, n_simul) points = dict((id, sum(val) / len(val)) for id, val in points.items()) return wins, points, scores, iterations
def train_loop(self, dataset, epochs, total_steps): for epoch in range(epochs): for i, real_images in enumerate(dataset): noises = tf.random.normal(shape=(self.batch_size, self.z_dim)) dis_loss = self.discriminator_train_step(noises, real_images) gen_loss = self.generator_train_step(noises) progressBar(epoch + 1, i + 1, total_steps, dis_loss.numpy(), gen_loss.numpy()) if (i + 1) % 50 == 0: self.generate_training_progress_result( self.ckpt.generator, epoch + 1, i + 1) dis_loss_epoch = self.dis_metric.result().numpy() gen_loss_epoch = self.gen_metric.result().numpy() print() print( f"Epoch {epoch+1} - D-Loss: {dis_loss_epoch} - G-Loss: {gen_loss_epoch}" ) self.ckpt_manager.save() self.ckpt.epoch.assign_add(1) self.dis_metric.reset_states() self.gen_metric.reset_states()
def train(self, strategies, grid_size, num_trials=100, max_iter=1000, verbose=False): print "RL training" totalRewards = [] # The rewards we get on each trial rl_id = len(strategies) for trial in xrange(num_trials): progressBar(trial, num_trials) game = interface.Game(grid_size, len(strategies) + 1, candy_ratio=1., max_iter=max_iter) state = game.startState() totalDiscount = 1 totalReward = 0 points = state.snakes[rl_id].points history = [] while not game.isEnd(state) and rl_id in state.snakes: # Compute the actions for each player following its strategy actions = { i: strategies[i](i, state) for i in state.snakes.keys() if i != rl_id } action, optimal_action = self.getAction(state) actions[rl_id] = action newState = game.succ(state, actions) if rl_id in newState.snakes: reward = newState.snakes[rl_id].points - points if len(newState.snakes) == 1: # it won reward += 10. points = newState.snakes[rl_id].points self.incorporateFeedback(state, action, reward, newState, history) else: # it died reward = -10. self.incorporateFeedback(state, action, reward, newState, history) # add decsion to history, or reset if non-greedy choice if optimal_action: history.append(self.featureExtractor(state, action)) else: history = [] totalReward += totalDiscount * reward totalDiscount *= self.discount state = newState if verbose: print "Trial %d (totalReward = %s)" % (trial, totalReward) totalRewards.append(totalReward) progressBar(num_trials, num_trials) print "Average reward:", sum(totalRewards) / num_trials return totalRewards
def train(self, opponents, grid_size, num_trials=100, max_iter=1000, verbose=False): print("RL training") totalRewards = [] # The rewards we get on each trial # rl_id = len(opponents) rl_agent = self.getAgent() agents = deepcopy(opponents) # add current agent to strategies agents.append(rl_agent) for trial in range(num_trials): # game = interface.Game(grid_size, len(strategies) + 1, candy_ratio = 1., max_iter = max_iter) # state = game.startState() game = interface.Game(grid_size, len(agents), candy_ratio=1., max_iter=max_iter) game.start(agents) totalDiscount = 1 totalReward = 0 rewards = [] while not game.isEnd() and rl_agent.isAlive(game): # Compute the actions for each player following its strategy # actions = {i: strategies[i](i, state) for i in state.snakes.keys() if i != rl_id} # action = self.getAction(state) # actions[rl_id] = action actions = game.agentActions() newState = game.succ(game.current_state, actions) reward = rl_agent.lastReward(game) rewards.append(reward) totalReward += totalDiscount * reward totalDiscount *= self.discount if self.rl_type == "qlearning": self.incorporateFeedback(game.previous_state, actions[rl_agent.getPlayerId()], reward, game.current_state) if self.rl_type == "policy_gradients": self.addRolloutFeedback(rewards, trial) progressBar(trial, num_trials, info="Last reward: {}".format(totalReward)) if verbose: print("Trial %d (totalReward = %s)" % (trial, totalReward)) totalRewards.append(totalReward) progressBar(num_trials, num_trials) print("Average reward:", sum(totalRewards) / num_trials) return totalRewards
def fit(self, x, y, x_val, y_val, steps=10000, save_best_only=True): sess, saver = self.sess, self.saver train, cost = self.train, self.cost H_indices, W_indices, Y = self.H_indices, self.W_indices, self.Y epochs, cost_function, min_cost = self.epochs, self.cost_function, self.min_cost checkpoint, model_name = self.checkpoint, self.model_name for epoch in range(0, epochs + 1): for step in range(0, steps + 1): training_error = sess.run(cost, feed_dict={ H_indices: x[0], W_indices: x[1], Y: y }) validation_error = sess.run(cost, feed_dict={ H_indices: x_val[0], W_indices: x_val[1], Y: y_val }) progressBar( step, steps, "Epoch : %d / %d, %s(training) : %.4f, %s(test) : %.4f" % (epoch, epochs, cost_function, training_error, cost_function, validation_error)) sess.run(train, feed_dict={ H_indices: x[0], W_indices: x[1], Y: y }) if not save_best_only or (save_best_only and min_cost > validation_error): filename = model_name + "%03d_%.4f" % ( epoch, validation_error) + ".ckpt" model_path = os.path.join(checkpoint, filename) saver.save(sess, model_path) print("Saved model %s" % filename) print("Done")
def main(NUMBERS, TARGET): n = len(NUMBERS) progress = 0 solutionsGiven = 0 SOL_SPACE_SIZE = n * (factorial(n - 1)**2) * (2**(n - 1) ) # See attached PDF countdown.spinner(NUMBERS, TARGET) INDENT = "\t\t\t" with open("solutions.txt", "w") as file: solution_generator = solver(NUMBERS, TARGET) for solution in solution_generator: progress += 1 if solution != "": if solutionsGiven <= 100: file.write(solution + "\n") solutionsGiven += 1 progressFractional = progress / SOL_SPACE_SIZE countdown.spinner(NUMBERS, TARGET) progressBar(INDENT, progressFractional, solution) # Uncomment line below to see number of operations that ran. # This number is exactly equal to the solution # space calculated! # file.write(str(progress)) with open("solutions.txt", "r") as file: solutions = file.readlines() if solutions == []: print("This problem is impossible!") return [print(line) for line in solutions[:100]]
def train_loop(self, dataset, epochs, total_steps): for epoch in range(epochs): start = time.time() for i, train_images in enumerate(dataset): loss = self.train_step(train_images) progressBar(epoch + 1, i + 1, total_steps, loss.numpy()) if (i + 1) % 50 == 0: self.generate_training_progress_result( self.ckpt.decoder, epoch + 1, i + 1) stop = time.time() print() print( f"EPOCH: {epoch+1} - LOSS: {self.vae_metric.result().numpy()} - Time: {round(stop-start,2)}s" ) self.ckpt_manager.save() self.ckpt.epoch.assign_add(1) self.vae_metric.reset_states()
def updateTrading212Dailies(self): stock_list = pd.read_csv("Trading212US.csv") for symbol in progressBar(stock_list['Symbol'], "Updating Dailies: ", "Complete", length=50, decimals=1): while True: try: data = self.stock_candles(symbol, 'D', time_to_unix(datetime(2019, 8,1,5,0,0)), time_to_unix((datetime.today()-timedelta(1)).replace(hour=23))) break except finnhub.exceptions.FinnhubAPIException: time.sleep(10) if data['s'] == 'no_data': continue data = self.candle_prep(data) data.to_csv("hist_data/{}_Daily.csv".format(symbol))
def get_intraday_minutes(self, watchlist, day=None, flag='today'): no_data = [] watch_dict = {} end_close = {} day = day if day else datetime.today() if flag == 'yesterday': delta = 3 if day.weekday() == 0 else 1 day = day - timedelta(delta) save_path = Path("minute_data/" + day.date().isoformat()) save_path.mkdir(parents=True, exist_ok=True) time_start = time_to_unix(day.replace(hour=15,minute=30, second=0)) time_end = time_to_unix(day.replace(hour=22,minute=0, second=30)) for symbol in progressBar(watchlist, "Fetching intraday data of {}: ".format(flag), length=50, decimals=1): symbol_csv = save_path.joinpath("{}.csv".format(symbol)) if symbol_csv.is_file(): watch_dict[symbol] = pd.read_csv(symbol_csv, index_col='Time', parse_dates=True) end_close[symbol] = watch_dict[symbol]['Price'].iloc[-1] continue while True: try: data = self.stock_candles(symbol, 1, time_start, time_end) break except (finnhub.exceptions.FinnhubAPIException, requests.exceptions.RequestException) as e: print(e) time.sleep(5) if data['s'] == 'no_data': no_data.append(symbol) continue data = self.minutes_prep(data) watch_dict[symbol] = data end_close[symbol] = data['Price'].iloc[-1] data.to_csv(symbol_csv) if len(no_data) > 0: print("no intraday data found for these symbols: \n", no_data) return watch_dict, no_data, end_close
sep=";") # Note that ID.txt file is delimited by semicolon psms = psms[["Peptide", "Outfile", "measuredMH", "XCorr"]] psms = psms.loc[psms["Outfile"].str.contains( mzXMLBaseName)] # Extract PSMs from FTLD_Batch2_F50.mzXML psms["precMz"] = np.nan psms["charge"] = np.nan psms["featureIndex"] = np.nan psms["category"] = "" psms = psms.drop_duplicates() print(" PSM information has been parsed\n") # Find the match between features and PSMs n1, n2 = 0, 0 # n1 = number of PSMs mapped to feature(s), n2 = number of PSMs not mapped to any feature isoWindow = 1 # Isolation window size for a precursor peak proton = 1.007276466812 progress = utils.progressBar(psms.shape[0]) for idx, psm in psms.iterrows(): progress.increment() [psmRunName, psmScanNum, _, psmZ, _] = os.path.basename(psm["Outfile"]).split(".") psms.loc[idx, "charge"] = psmZ if psmRunName == mzXMLBaseName: # Extract the precursor m/z psmScanNum = int(psmScanNum) surveyScanNum = ms2ToSurvey[psmScanNum] precMz, _, _ = getPrecursorPeak(reader, psmScanNum, surveyScanNum, params) # Assign the feature corresponding to the PSM, based on the precursor m/z and feature's m/z if precMz != -1: psms.loc[idx, "precMz"] = precMz
accumulated_mae = 0 accumulated_mse = 0 print(test_size_ML, " points to process...") horizon = 10 for i in range(0, test_size_ML - horizon - 1, test_size_ML // 100): x_train = test_data_ML.x[i].reshape(-1, window_size * dim) y_train = test_data_ML.y[i] temp_kernel = ConstantKernel( 1.0, (1e-3, 1e3)) * RationalQuadratic() + WhiteKernel() gpr = GaussianProcessRegressor(kernel=temp_kernel, random_state=0) gpr.fit(x_train, y_train) y_pred = gpr.predict(test_data_ML.x[i + 1:i + horizon].reshape( -1, window_size * dim)) temp_mae = mae(y_pred, test_data_ML.y[i + 1:i + horizon].reshape(-1, 1)) temp_mse = mse(y_pred, test_data_ML.y[i + 1:i + horizon].reshape(-1, 1)) mae_list.append(temp_mae) mse_list.append(temp_mse) accumulated_mae = (i / (i + 1)) * accumulated_mae + temp_mae / (i + 1) accumulated_mse = (i / (i + 1)) * accumulated_mse + temp_mse / (i + 1) progressBar(i, test_size_ML, 100) print("Accumulated_mae ", accumulated_mae) print("Accumulated_mse ", accumulated_mse)
def detectFeatures(inputFile, paramFile): ############## # Parameters # ############## params = utils.getParams(paramFile) firstScan = int(params["first_scan_extraction"]) lastScan = int(params["last_scan_extraction"]) gap = int(params["skipping_scans"]) scanWindow = gap + 1 matchPpm = float(params["mass_tolerance_peak_matching"]) ################## # Initialization # ################## reader = mzxml.read(inputFile) f = [] # Feature array nFeatures = -1 cache = [] noise = {} # Empty dictionary for noise level information oldMinInd = -1 oldMaxInd = -1 ############################ # Get MS1 scan information # ############################ ms = [] with reader: msCount = 0 # filename = os.path.basename(inputFile) # print(" Extraction of MS1 spectra from %s" % filename) for spec in reader: msLevel = int(spec["msLevel"]) scanNum = int(spec["num"]) if msLevel == 1 and firstScan <= scanNum <= lastScan: ms.append(spec) msCount += 1 elif scanNum > lastScan: break # print(" Done") ################################ # Feature (3D-peak) generation # ################################ filename = os.path.basename(inputFile) print(" Feature detection from %s" % filename) logging.info(" Feature detection from " + filename) progress = utils.progressBar(msCount) for i in range(msCount): progress.increment() minInd = max(0, i - gap - 1) maxInd = min(msCount - 1, i + gap + 1) if i == 0: for j in range(maxInd + 1): spec = detectPeaks(ms[j], params) spec["index"] = j cache.append(spec) else: for j in range(oldMinInd, minInd): cache.pop(0) # Remove the first element in cache for j in range(oldMaxInd + 1, maxInd + 1): spec = detectPeaks(ms[j], params) spec["index"] = j cache.append(spec) ################## # Reduction step # ################## p = cache[i - minInd] pCount = len(p["m/z array"]) valids = np.array([]) count = 0 for j in range(pCount): cm = p["m/z array"][j] match = 0 nTry = 0 # Backward search for k in range(i - 1, minInd - 1, -1): q = cache[k - minInd] if q["m/z array"].size == 0: continue else: match, ind = getClosest(q, cm, matchPpm) if match == 1: break nTry += 1 if nTry > scanWindow: break if match == 0: # Forward search nTry = 0 for k in range(i + 1, maxInd + 1): q = cache[k - minInd] if q["m/z array"].size == 0: continue else: match, ind = getClosest(q, cm, matchPpm) if match == 1: break nTry += 1 if nTry > scanWindow: break if match == 1: valids = np.append(valids, j) # Peak reduction and noise-level estimation p, noise = reduceMS1(p, noise, valids) ##################### # Peak merging step # ##################### cache[i - minInd] = p pCount = len(p["m/z array"]) for j in range(pCount): cm = p["m/z array"][j] match = 0 nTry = 0 matchedPeakInd = [] # Backward search for k in range(i - 1, minInd - 1, -1): q = cache[k - minInd] if q["m/z array"].size == 0: continue else: matchIndicator, ind = getClosest(q, cm, matchPpm) # $matchIndicator = 1 means that the j-th (reduced) peak in the i-th scan # can form a 3D-peak with $ind-th (reduced) peak in the previous scan (%q) if matchIndicator == 1: matchedPeakInd.append(q["featureIndex"][ind]) match = 1 if match == 1: matchedPeakInd = list(set(matchedPeakInd)) # Make the list unique fInd = None if len(matchedPeakInd) > 1: # There are multiple matches to the peaks in previous scans fInd = min(matchedPeakInd) for m in matchedPeakInd: # Merge to the lowest indexed feature and remove the "merged" features if m != fInd: f[fInd]["mz"].extend(f[m]["mz"]) f[fInd]["intensity"].extend(f[m]["intensity"]) f[fInd]["num"].extend(f[m]["num"]) f[fInd]["rt"].extend(f[m]["rt"]) f[fInd]["index"].extend(f[m]["index"]) # Revise cache array for s in f[m]["index"]: for t in range(len(cache)): if cache[t]["index"] == s: for u in range(len(cache[t]["featureIndex"])): if cache[t]["featureIndex"][u] == m: cache[t]["featureIndex"][u] = fInd f[m] = None # Keep the size of feature array else: fInd = matchedPeakInd[0] if "featureIndex" in cache[i - minInd]: cache[i - minInd]["featureIndex"].append(fInd) else: cache[i - minInd]["featureIndex"] = [fInd] f[fInd]["mz"].append(p["m/z array"][j]) f[fInd]["intensity"].append(p["intensity array"][j]) f[fInd]["num"].append(p["num"]) f[fInd]["rt"].append(p["retentionTime"]) f[fInd]["index"].append(p["index"]) if match != 1: if i < msCount: nFeatures += 1 if "featureIndex" in cache[i - minInd]: cache[i - minInd]["featureIndex"].append(nFeatures) else: cache[i - minInd]["featureIndex"] = [nFeatures] f.append({"mz": [p["m/z array"][j]], "intensity": [p["intensity array"][j]], "num": [p["num"]], "rt": [p["retentionTime"]], "index": [i]}) oldMinInd = minInd oldMaxInd = maxInd # Remove empty features f = [i for i in f if i is not None] ################################# # Filtering features (3D-peaks) # ################################# # A feature may contain multiple peaks from one scan # In this case, one with the largest intensity is chosen gMinRt, gMaxRt = 0, 0 # Global minimum and maximum RT over all features for i in range(len(f)): if len(f[i]["num"]) != len(list(set(f[i]["num"]))): temp = {} for j in range(len(f[i]["num"])): if f[i]["num"][j] in temp: currIntensity = f[i]["intensity"][j] if currIntensity > temp[f[i]["num"][j]]["intensity"]: temp[f[i]["num"][j]]["intensity"] = currIntensity temp[f[i]["num"][j]]["index"] = j else: temp[f[i]["num"][j]] = {} temp[f[i]["num"][j]]["intensity"] = f[i]["intensity"][j] temp[f[i]["num"][j]]["index"] = j uInd = [] for key in sorted(temp.keys()): uInd.append(temp[key]["index"]) f[i]["mz"] = [f[i]["mz"][u] for u in uInd] f[i]["intensity"] = [f[i]["intensity"][u] for u in uInd] f[i]["num"] = [f[i]["num"][u] for u in uInd] f[i]["rt"] = [f[i]["rt"][u] for u in uInd] f[i]["index"] = [f[i]["index"][u] for u in uInd] if i == 0: gMinRt = min(f[i]["rt"]) gMaxRt = max(f[i]["rt"]) else: if min(f[i]["rt"]) < gMinRt: gMinRt = min(f[i]["rt"]) if max(f[i]["rt"]) > gMaxRt: gMaxRt = max(f[i]["rt"]) if gMaxRt.unit_info == "minute": gMaxRt = gMaxRt * 60 gMinRt = gMinRt * 60 ################################### # Organization of output features # ################################### n = 0 ms1ToFeatures = {} for i in range(len(f)): # 1. mz: mean m/z of a feauture = weighted average of m/z and intensity mz = np.sum(np.multiply(f[i]["mz"], f[i]["intensity"])) / np.sum(f[i]["intensity"]) # 2. intensity: intensity of a feature (maximum intensity among the peaks consist of the feature) intensity = max(f[i]["intensity"]) # 3. z: charge of the feature, set to 1 now, but modified later z = 1 isotope = 0 # Will be used later # 4. RT: RT of the representative peak (i.e. strongest peak) of a feature ind = np.argmax(f[i]["intensity"]) rt = f[i]["rt"][ind] # 5. minRT and maxRT minRt = min(f[i]["rt"]) maxRt = max(f[i]["rt"]) # Conversion of RT to the unit of second if rt.unit_info == "minute": rt = rt * 60 # Convert to the unit of second minRt = minRt * 60 maxRt = maxRt * 60 # 6. MS1 scan number of the representative peak of a feature ms1 = f[i]["num"][ind] # 7. minMS1 and maxMS1 minMs1 = min(list(map(int, f[i]["num"]))) maxMs1 = max(list(map(int, f[i]["num"]))) # 8. SNratio (signal-to-noise ratio of the feature) if ms1 in noise: noiseLevel = noise[ms1] else: noiseLevel = 500 snRatio = intensity / noiseLevel featureIntensityThreshold = noiseLevel * float(params["signal_noise_ratio"]) if intensity >= featureIntensityThreshold: # 9. Percentage of true feature pctTF = (maxRt - minRt) / (gMaxRt - gMinRt) * 100 # Organize features in a structured numpy array form if n == 0: features = np.array([(mz, intensity, z, rt, minRt, maxRt, ms1, minMs1, maxMs1, snRatio, pctTF, isotope)], dtype="f8, f8, f8, f8, f8, f8, f8, f8, f8, f8, f8, f8") n += 1 else: features = np.append(features, np.array([(mz, intensity, z, rt, minRt, maxRt, ms1, minMs1, maxMs1, snRatio, pctTF, isotope)], dtype=features.dtype)) for j in range(len(f[i]["num"])): num = f[i]["num"][j] if num not in ms1ToFeatures: ms1ToFeatures[num] = {"mz": [f[i]["mz"][j]], "intensity": [f[i]["intensity"][j]]} else: ms1ToFeatures[num]["mz"].append(f[i]["mz"][j]) ms1ToFeatures[num]["intensity"].append(f[i]["intensity"][j]) else: continue features.dtype.names = ("mz", "intensity", "z", "RT", "minRT", "maxRT", "MS1", "minMS1", "maxMS1", "SNratio", "PercentageTF", "isotope") ########################## # Decharging of features # ########################## features = dechargeFeatures(features) # print() ############################################ # Convert the features to pandas dataframe # # Write features to a file # ############################################ df = pd.DataFrame(features) df = df.drop(columns = ["isotope"]) # "isotope" column was internally used, and no need to be transferred # Create a subdirectory and save features to a file baseFilename = os.path.splitext(os.path.basename(filename))[0] # i.e. filename without extension featureDirectory = os.path.join(os.getcwd(), baseFilename) if not os.path.exists(featureDirectory): os.mkdir(featureDirectory) # # Increment the number of a feature file # if len(glob.glob(os.path.join(featureDirectory, baseFilename + ".*.feature"))) == 0: # featureFilename = os.path.splitext(os.path.basename(filename))[0] + ".1.feature" # else: # oldNo = 0 # for f in glob.glob(os.path.join(featureDirectory, baseFilename + ".*.feature")): # oldNo = max(oldNo, int(os.path.basename(f).split(".")[-2])) # featureFilename = baseFilename + "." + str(int(oldNo) + 1) + ".feature" # featureFilename = os.path.join(featureDirectory, featureFilename) # Simply overwrite any existing feature file # Individual feature file still needs to be located in an input file-specific location # since the feature file can be directly used later featureFilename = baseFilename + ".feature" featureFilename = os.path.join(featureDirectory, featureFilename) df.to_csv(featureFilename, index = False, sep = "\t") return df # Pandas DataFrame
def searchLibrary(full, paramFile): ################################## # Load parameters and initialize # ################################## try: params = utils.getParams(paramFile) except: sys.exit("Parameter file cannot be found or cannot be loaded") condition = params["LC_column"].lower() if params["mode"] == "1": condition = condition + "p" elif params["mode"] == "-1": condition = condition + "n" else: sys.exit("'mode' parameter should be either 1 or -1") proton = 1.007276466812 matchMzTol = float(params["library_mass_tolerance"]) # Unit of ppm adducts = adductDictionary(params) nFeatures = full.shape[0] # While full["feature_RT"] has the unit of minute, the library compounds have RTs in the unit of second # So, within this function, full["feature_RT"] needs to be converted to the unit of second full["feature_RT"] = full["feature_RT"] * 60 ########################## # Perform library search # ########################## allRes = pd.DataFrame() nLibs = 1 for libFile in params["library"]: doAlignment = int(params["library_rt_alignment"]) print(" Library {} is being loaded".format(os.path.basename(libFile))) logging.info(" Library {} is being loaded".format( os.path.basename(libFile))) try: conn = sqlite3.connect(libFile) except: sys.exit("Library file cannot be found or cannot be loaded.") ##################################################### # RT-alignment between features and library entries # ##################################################### # Check whether 'rt' column of the library is numeric value or not hasNumericRt = 0 cursor = conn.execute("PRAGMA table_info(library)") pragma = cursor.fetchall() for row in pragma: if row[1].lower() == "rt": if row[2].lower() == "real": hasNumericRt = 1 break # RT-alignment if doAlignment == 1: if hasNumericRt == 1: print( " RT-alignment is being performed between features and library compounds" ) logging.info( " RT-alignment is being performed between features and library compounds" ) x, y = prepRtAlignment(full, conn, params) mod = rtAlignment(x, y) if mod == -1: print( " Since there are TOO FEW feature RTs comparable to library RTs, RT-alignment is skipped" ) logging.info( " Since there are TOO FEW feature RTs comparable to library RTs, RT-alignment is skipped" ) doAlignment = 0 else: # Calibration of features' RT rPredict = ro.r("predict") full["feature_calibrated_RT"] = None full["feature_calibrated_RT"] = full[ "feature_RT"] - rPredict( mod, FloatVector(full["feature_RT"])) # Empirical CDF of alignment (absolute) residuals (will be used to calculate RT shift-based scores) ecdfRt = ECDF(abs(np.array(mod.rx2("residuals")))) else: print( " Although the parameter is set to perform RT-alignment against the library, there are no valid RT values in the library" ) print(" Therefore, RT-alignment is not performed") logging.info( " Although the parameter is set to perform RT-alignment against the library, there are no valid RT values in the library" ) logging.info(" Therefore, RT-alignment is not performed") doAlignment = 0 else: print( " According to the parameter, RT-alignment is not performed between features and library compounds" ) logging.info( " According to the parameter, RT-alignment is not performed between features and library compounds" ) ######################################## # Match features and library compounds # ######################################## # Match features and library compounds print(" Features are being compared with library compounds") logging.info(" Features are being compared with library compounds") res = { "no": [], "feature_index": [], "feature_m/z": [], "feature_original_RT": [], "feature_aligned_RT": [], "id": [], "other_id": [], "formula": [], "name": [], "ion": [], "RT": [], "SMILES": [], "InchiKey": [], "collision_energy": [], "RT_shift": [], "RT_score": [], "MS2_score": [], "combined_score": [] } intensityCols = [ col for col in full.columns if col.lower().endswith("_intensity") ] for c in intensityCols: res[c] = [] n = 0 progress = utils.progressBar(nFeatures) for i in range(nFeatures): progress.increment() # Feature information fZ = full["feature_z"].iloc[i] fSpec = full["MS2"].iloc[i] if np.isnan( fZ ) or fSpec is None: # When MS2 spectrum of the feature is not defined, skip it continue fMz = full["feature_m/z"].iloc[i] fRt = full["feature_RT"].iloc[i] fIntensity = full[intensityCols].iloc[i] if params["mode"] == "1": # Positive mode fMass = fZ * (fMz - proton) elif params["mode"] == "-1": # Negative mode fMass = fZ * (fMz + proton) # Retrieve library compounds of which neutral masses are similar to feature mass df = queryLibrary(fMz, fMass, fZ, conn, adducts, matchMzTol) if not df.empty: colNameOtherId = df.filter(regex="other_ids").columns[0] for j in range(df.shape[0]): # When there is/are library compound(s) matched to the feature, # MS2 of the library compound(s) should be retrieved uid = df["id"].iloc[j] uid = uid.replace("##Decoy_", "") sqlQuery = r"SELECT * FROM {}".format(uid) try: libSpec = pd.read_sql_query(sqlQuery, conn) except: continue if not libSpec.empty: n += 1 # Calculate the score based on MS2 spectrum libSpec = libSpec.to_dict(orient="list") simMs2 = calcMS2Similarity(fSpec, libSpec, params) pMs2 = 1 - simMs2 # p-value-like score (the smaller, the better) pMs2 = max(np.finfo(float).eps, pMs2) # Prevent the underflow caused by 0 # Calculate the (similarity?) score based on RT-shift if doAlignment == 1: fAlignedRt = full["feature_calibrated_RT"].iloc[i] rtShift = fAlignedRt - df["rt"].iloc[j] pRt = ecdfRt( abs(rtShift) ) # Also, p-value-like score (the smaller, the better) pRt = max(np.finfo(float).eps, pRt) simRt = 1 - pRt # p = 1 / (0.5 / pMS2 + 0.5 / pRt) # Combined p-value using harmonic mean with equal weights p = 1 - stats.chi2.cdf( -2 * (np.log(pMs2) + np.log(pRt)), 4) # Fisher's method # p = -2 * (np.log(pMs2) + np.log(pRt)) # Fisher's method used in Perl pipeline (the smaller, the better) else: fAlignedRt = "NA" if hasNumericRt == 1 and df["rt"].iloc[ j] is not None: rtShift = fRt - df["rt"].iloc[j] else: rtShift = "NA" # pRt = 1 simRt = "NA" p = pMs2 # Output libId = df["id"].iloc[j] libOtherId = df[colNameOtherId].iloc[j] libFormula = df["formula"].iloc[j] libName = df["name"].iloc[j] if hasNumericRt == 1: libRt = df["rt"].iloc[j] else: libRt = "NA" libIon = df["ion_type"].iloc[j] libSmiles = df["smiles"].iloc[j] libInchiKey = df["inchikey"].iloc[j] libEnergy = df["collision_energy"].iloc[j] res["no"].append(n) res["feature_index"].append(i + 1) res["feature_m/z"].append(fMz) res["feature_original_RT"].append( fRt / 60) # For output, the unit of RT is minute if doAlignment == 1: res["feature_aligned_RT"].append(fAlignedRt / 60) else: res["feature_aligned_RT"].append(fAlignedRt) for c in intensityCols: res[c].append(fIntensity[c]) res["id"].append(libId) res["other_id"].append(libOtherId) res["formula"].append(libFormula) res["name"].append(libName) res["ion"].append(libIon) if hasNumericRt == 1: res["RT"].append(libRt / 60) else: res["RT"].append(libRt) res["SMILES"].append(libSmiles) res["InchiKey"].append(libInchiKey) res["collision_energy"].append(libEnergy) if rtShift != "NA": rtShift = abs(rtShift) / 60 # Convert to "minute" res["RT_shift"].append(rtShift) # Haiyan's preference # RT_score and MS2_score: 0 ~ 1 (bad to good) res["RT_score"].append(simRt) res["MS2_score"].append(simMs2) res["combined_score"].append(abs(-np.log10(p))) conn.close() res = pd.DataFrame.from_dict(res) resCols = ["no", "feature_index", "feature_m/z", "feature_original_RT", "feature_aligned_RT"] + intensityCols + \ ["id", "other_id", "formula", "name", "ion", "RT", "SMILES", "InchiKey", "collision_energy", "RT_shift", "RT_score", "MS2_score", "combined_score"] res = res[resCols] res = res.rename(columns={"other_id": colNameOtherId}) filePath = os.path.join(os.getcwd(), "align_" + params["output_name"]) outputFile = os.path.join( filePath, "align_" + params["output_name"] + "." + str(nLibs) + ".library_matches") res.to_csv(outputFile, sep="\t", index=False) allRes = allRes.append(res, ignore_index=True) nLibs += 1 # RT unit of "full" needs to be converted back to minute for subsequent procedures (i.e. database search) full["feature_RT"] = full["feature_RT"] / 60 return allRes
nmt.eval() print('Model perplexity: ', perplexity(nmt, sourceTest, targetTest, batchSize)) if len(sys.argv) > 3 and sys.argv[1] == 'translate': (sourceWord2ind, targetWord2ind) = pickle.load(open(wordsDataFileName, 'rb')) sourceTest = utils.readCorpus(sys.argv[2]) nmt = model.NMTmodel(embedding_size, hidden_size, targetWord2ind, sourceWord2ind, startToken, padToken, unkToken, endToken).to(device) nmt.load(modelFileName) nmt.eval() file = open(sys.argv[3], 'w') pb = utils.progressBar() pb.start(len(sourceTest)) for s in sourceTest: file.write(' '.join(nmt.translateSentence(s)) + "\n") pb.tick() pb.stop() if len(sys.argv) > 3 and sys.argv[1] == 'bleu': ref = [[s] for s in utils.readCorpus(sys.argv[2])] hyp = utils.readCorpus(sys.argv[3]) bleu_score = corpus_bleu(ref, hyp) print('Corpus BLEU: ', (bleu_score * 100))
def ms2ForFeatures(full, mzxmlFiles, paramFile): print(" Identification of MS2 spectra for the features") print(" ==============================================") logging.info(" Identification of MS2 spectra for the features") logging.info(" ==============================================") full = full.to_records( index=False ) # Change pd.DataFrame to np.RecArray for internal computation (speed issue) ###################################### # Load parameters and initialization # ###################################### params = utils.getParams(paramFile) # ppiThreshold = "max" # Hard-coded ppiThreshold = params["ppi_threshold_of_features"] pctTfThreshold = float(params["max_percentage_RT_range"]) tolIsolation = float(params["isolation_window"]) tolPrecursor = float(params["tol_precursor"]) tolIntraMS2Consolidation = float(params["tol_intra_ms2_consolidation"]) tolInterMS2Consolidation = float(params["tol_inter_ms2_consolidation"]) nFeatures = len(full) nFiles = len(mzxmlFiles) featureToScan = np.empty((nFeatures, nFiles), dtype=object) featureToSpec = np.empty((nFeatures, nFiles), dtype=object) ################################################# # Assignment of MS2 spectra to features # # Consolidation of MS2 spectra for each feature # ################################################# m = -1 # Index for input files for file in mzxmlFiles: m += 1 reader = mzxml.MzXML(file) fileBasename, _ = os.path.splitext(os.path.basename(file)) colNames = [ item for item in full.dtype.names if item.startswith(fileBasename + "_") ] subset = full[colNames] subset.dtype.names = [s.split("_")[-1] for s in subset.dtype.names] ms2Dict = {} minScan, maxScan = int(np.nanmin(subset["minMS1"])), int( np.nanmax(subset["maxMS1"])) progress = utils.progressBar(maxScan - minScan + 1) print(" %s is being processed" % os.path.basename(file)) print(" Looking for MS2 scan(s) responsible for each feature") logging.info(" %s is being processed" % os.path.basename(file)) logging.info(" Looking for MS2 scan(s) responsible for each feature") for i in range(minScan, maxScan + 1): progress.increment() spec = reader[str(i)] msLevel = spec["msLevel"] if msLevel == 1: surveyNum = i elif msLevel == 2: # Find MS2 scans which satisfy the following conditions # From the discussion around June 2020, # 1. In ReAdW-derived mzXML files, precursor m/z values are in two tags: "precursorMz" and "filterLine" # 2. Through Haiyan's manual inspection, the real precursor m/z value is closer to one in "filterLine" tag # 3. So, in this script, precursor m/z of MS2 scan is obtained from "filterLine" tag # 4. Note that it may be specific to ReAdW-derived mzXML files since MSConvert-derived mzXML files do not have "filterLine" tag # 4.1. In this case, maybe the use of mzML (instead of mzXML) would be a solution (to-do later) # precMz = spec["precursorMz"][0]["precursorMz"] # Precursor m/z from "precursorMz" tag p = re.search("([0-9.]+)\\@", spec["filterLine"]) precMz = float(p.group(1)) survey = reader[str(surveyNum)] fInd = np.where((surveyNum >= subset["minMS1"]) & (surveyNum <= subset["maxMS1"]) & (subset["mz"] >= (precMz - tolIsolation)) & (subset["mz"] <= (precMz + tolIsolation)) & (subset["PercentageTF"] <= pctTfThreshold))[0] if len(fInd) > 0: ppi = [] for i in range(len(fInd)): mz = subset["mz"][fInd[i]] lL = mz - mz * tolPrecursor / 1e6 uL = mz + mz * tolPrecursor / 1e6 ind = np.where((survey["m/z array"] >= lL) & (survey["m/z array"] <= uL))[0] if len(ind) > 0: ppi.append(np.max(survey["intensity array"][ind])) else: ppi.append(0) if sum(ppi) == 0: continue ppi = ppi / np.sum( ppi) * 100 # Convert intensities to percentage values if ppiThreshold == "max": fInd = np.array([fInd[np.argmax(ppi)]]) else: # ppiThreshold should be a numeric value ppiThreshold = float(ppiThreshold) fInd = fInd[np.where(ppi > ppiThreshold)] if len(fInd ) == 0: # Last check of candidate feature indexes continue else: # Add this MS2 scan information to ms2Dict ms2Dict[spec["num"]] = {} ms2Dict[spec["num"]]["mz"] = spec["m/z array"] ms2Dict[ spec["num"]]["intensity"] = spec["intensity array"] # Mapping between features and MS2 scan numbers for i in range(len(fInd)): if featureToScan[fInd[i], m] is None: featureToScan[fInd[i], m] = spec["num"] else: featureToScan[fInd[i], m] += ";" + spec["num"] print( " Merging MS2 spectra for each feature within a run (it may take a while)" ) logging.info( " Merging MS2 spectra for each feature within a run (it may take a while)" ) progress = utils.progressBar(nFeatures) for i in range(nFeatures): progress.increment() if featureToScan[i, m] is not None: spec = intraConsolidation(ms2Dict, featureToScan[i, m], tolIntraMS2Consolidation) featureToSpec[i, m] = spec print( " Merging MS2 spectra for each feature between runs when there are multiple runs" ) print( " Simplification of MS2 spectrum for each feature by retaining the most strongest 100 peaks" ) logging.info( " Merging MS2 spectra for each feature between runs when there are multiple runs" ) logging.info( " Simplification of MS2 spectrum for each feature by retaining the most strongest 100 peaks" ) specArray = np.array([]) progress = utils.progressBar(nFeatures) for i in range(nFeatures): progress.increment() if np.sum(featureToSpec[i] == None) == nFiles: specArray = np.append(specArray, None) else: spec = interConsolidation(featureToSpec[i, :], tolInterMS2Consolidation) specArray = np.append(specArray, spec) ############################### # MS2 processing for features # ############################### # "specArray" is the list of (consolidated) MS2 spectra # specArray[i] is the MS2 spectrum corresponding to the i-th feature # If there's no MS2 spectrum, then specArray[i] is None df = utils.summarizeFeatures(full, params) # Add the mean m/z of feature and its charge state to the beginning of MS2 spectrum (similar to .dta file) for i in range(nFeatures): if specArray[i] is not None: specArray[i]["mz"] = np.insert(specArray[i]["mz"], 0, df["feature_m/z"].iloc[i]) specArray[i]["intensity"] = np.insert(specArray[i]["intensity"], 0, df["feature_z"].iloc[i]) df["MS2"] = specArray df = df.sort_values( by="feature_m/z", ignore_index=True) # Features are sorted by "feature_m/z" df.insert(loc=0, column="feature_num", value=df.index + 1) # df["feature_num"] = df.index + 1 # Update "feature_num" according to the ascending order of "feature_m/z" (as sorted) # Write MS2 spectra to files filePath = os.path.join(os.getcwd(), "align_" + params["output_name"]) ms2Path = os.path.join(filePath, "MS2") if not os.path.exists(ms2Path): os.mkdir(ms2Path) for i in range(df.shape[0]): if df["MS2"].iloc[i] is not None: fileName = os.path.join(ms2Path, "f" + str(i + 1) + ".MS2") dfMS2 = pd.DataFrame.from_dict(df["MS2"].iloc[i]) dfMS2.to_csv(fileName, index=False, header=False, sep="\t") # Save fully-aligned features with their MS2 spectra (i.e. res) for debugging purpose # When the pipeline gets mature, this part needs to be removed pickle.dump(df, open(os.path.join(filePath, ".fully_aligned_feature.pickle"), "wb")) # Make the file be hidden ########################## # Handling mzXML file(s) # ########################## # Move mzXML files to the directory(ies) where individual .feature files are located if params["skip_feature_detection"] == "0": for file in mzxmlFiles: baseFilename = os.path.basename(file) featureDirectory = os.path.join(os.getcwd(), os.path.splitext(baseFilename)[0]) os.rename(file, os.path.join(featureDirectory, baseFilename)) return df, featureToScan
def buildG4BLfield(magDict, gridDict, saveAs=None, FBonly=False, coil=True): """Builds a magnetic field of SSU/SSD and prints it out to a .table file in g4blgrid format. Args: magDict (dict): Dictionary containing magnet, coil currents and custom fitDict paths. If fitDict paths are not specified it pulls the default ones. gridDict (dict): Dictionary containing information about the grid in which to calculate the field over. saveAs (str): Name that the user wishes to call the outputted field (no need to supply full path). If None (default value), the magnet name + todays date is used. FBonly (bool): When True: calculate only FB terms. When False: calculate geofit+FB terms, i.e the full model field is output. coil (bool): When true, the full field is calculated from the coil fit model. If false, the geometrical fit model is used instead. Returns: Doesn't return anything. The outputted field is saved at data/MAUS/saveAs.table. Todo: *The scaleList part could change? May need support so that it can be adjusted by the user """ print 'Calculating field map for magnet:', magDict['magnet'] print 'With currents:' print '\n\t M1 -> %.2f A\n\t M2 -> %.2f A\n\t ECE -> %.2f A\n'%(magDict['M1']['I'], \ magDict['M2']['I'], \ magDict['CC']['I']) if FBonly == False and coil == True: coilfit_calc = get_coilfit_class(magDict) print 'This could take a while...' if saveAs == None: _date = time.localtime() saveAs = '%s_%s%02d%02d.table'%(magDict['magnet'], _date.tm_year, \ _date.tm_mon, _date.tm_mday) xNsteps = int((gridDict['x']['end'] + gridDict['x']['step'])/gridDict['x']['step']) xARR = np.linspace(gridDict['x']['start'], gridDict['x']['end'], xNsteps) yNsteps = int((gridDict['y']['end'] + gridDict['y']['step'])/gridDict['y']['step']) yARR = np.linspace(gridDict['y']['start'], gridDict['y']['end'], yNsteps) zNsteps = int((gridDict['z']['end'] + gridDict['z']['step'])/gridDict['z']['step']) zARR = np.linspace(gridDict['z']['start'], gridDict['z']['end'], zNsteps) scaleList = [' 1 X [1e3]\n', ' 2 Y [1e3]\n', ' 3 Z [1e3]\n', \ ' 4 BX [1e-3]\n', ' 5 BY [1e-3]\n', ' 6 BZ [1e-3]\n', ' 0\n'] print 'Writing out %d field points'%(xNsteps*yNsteps*zNsteps) count = 1 start_time = time.time() with open(os.path.join(utils.maus_field_path, saveAs), 'w') as _output: _output.write('\t%d\t%d\t%d\t1\n'%(xNsteps, yNsteps, zNsteps)) for i in scaleList: _output.write(i) for _x in xARR: for _y in yARR: for _z in zARR: if FBonly == True: Bx, By, Bz = appFB.applyFB_grid(magDict, _x, _y, _z, 0, 0, 0) elif FBonly == False: _Bx, _By, _Bz = coilfit_calc.calc_full_field_at_point_xyz(_x, _y, _z) Bx, By, Bz = appFB.applyFB_grid(magDict, _x, _y, _z, _Bx, _By, _Bz) _output.write('{:.3f}\t{:.3f}\t{:.3f}\t{:.8f}\t{:.8f}\t{:.8f}\n'.format( \ _x, _y,_z, Bx, By, Bz)) utils.progressBar(count, xNsteps*yNsteps*zNsteps, start_time, time.time()) count += 1 print 'Finished! File can be found at %s'%os.path.join(utils.maus_field_path, saveAs)
output_data = [] print('Fitting functions to clusters...') for i, cluster in enumerate(extracted_point_clusters): cluster.show_object_fit = show_object_fit cluster.show_object_fit_separate = show_object_fit_separate cluster.add_header_data(headers) cluster.add_background_data(background) try: params = cluster.fit_curve(function=fit_function, square_size=square_size) except Exception as e: continue # suppress all Exceptions, incorrect fits are discarded finally: if not show_object_fit and not show_object_fit_separate: progressBar(i, len(extracted_point_clusters) - 1) if cluster.correct_fit: output_data.append(cluster.output_data()) result = "" result += '-' * 150 + '\n' result += '{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}'.format( "x", "y", "flux", "fwhm_x|fwhm_y", "peak_SNR", "fit_rms", "skew_x|skew_y", "kurt_x|kurt_y") + '\n' result += '-' * 150 + '\n' for i, data in enumerate(output_data): result += '{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}{:<15}'.format( data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]) + '\n'
def main(args): dataset_name = args.dataset model_name = args.model n_inner_iter = args.adaptation_steps batch_size = args.batch_size save_model_file = args.save_model_file load_model_file = args.load_model_file lower_trial = args.lower_trial upper_trial = args.upper_trial is_test = args.is_test stopping_patience = args.stopping_patience epochs = args.epochs fast_lr = args.learning_rate slow_lr = args.meta_learning_rate noise_level = args.noise_level noise_type = args.noise_type resume = args.resume first_order = False inner_loop_grad_clip = 20 task_size = 50 output_dim = 1 horizon = 10 ##test meta_info = { "POLLUTION": [5, 50, 14], "HR": [32, 50, 13], "BATTERY": [20, 50, 3] } assert model_name in ("FCN", "LSTM"), "Model was not correctly specified" assert dataset_name in ("POLLUTION", "HR", "BATTERY") window_size, task_size, input_dim = meta_info[dataset_name] grid = [0., noise_level] output_directory = "output/" train_data_ML = pickle.load( open( "../../Data/TRAIN-" + dataset_name + "-W" + str(window_size) + "-T" + str(task_size) + "-ML.pickle", "rb")) validation_data_ML = pickle.load( open( "../../Data/VAL-" + dataset_name + "-W" + str(window_size) + "-T" + str(task_size) + "-ML.pickle", "rb")) test_data_ML = pickle.load( open( "../../Data/TEST-" + dataset_name + "-W" + str(window_size) + "-T" + str(task_size) + "-ML.pickle", "rb")) for trial in range(lower_trial, upper_trial): output_directory = "../../Models/" + dataset_name + "_" + model_name + "_MAML/" + str( trial) + "/" save_model_file_ = output_directory + save_model_file save_model_file_encoder = output_directory + "encoder_" + save_model_file load_model_file_ = output_directory + load_model_file try: os.mkdir(output_directory) except OSError as error: print(error) with open(output_directory + "/results2.txt", "a+") as f: f.write("Learning rate :%f \n" % fast_lr) f.write("Meta-learning rate: %f \n" % slow_lr) f.write("Adaptation steps: %f \n" % n_inner_iter) f.write("\n") if model_name == "LSTM": model = LSTMModel(batch_size=batch_size, seq_len=window_size, input_dim=input_dim, n_layers=2, hidden_dim=120, output_dim=output_dim) model2 = LinearModel(120, 1) optimizer = torch.optim.Adam(list(model.parameters()) + list(model2.parameters()), lr=slow_lr) loss_func = mae #loss_func = nn.SmoothL1Loss() #torch.backends.cudnn.enabled = False device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") meta_learner = MetaLearner(model2, optimizer, fast_lr, loss_func, first_order, n_inner_iter, inner_loop_grad_clip, device) model.to(device) early_stopping = EarlyStopping(patience=stopping_patience, model_file=save_model_file_encoder, verbose=True) early_stopping2 = EarlyStopping(patience=stopping_patience, model_file=save_model_file_, verbose=True) if resume: model.load_state_dict(torch.load(save_model_file_encoder)) model2.load_state_dict( torch.load(save_model_file_)["model_state_dict"]) val_error = test(validation_data_ML, meta_learner, model, device) early_stopping(val_error, model) early_stopping2(val_error, meta_learner) total_tasks, task_size, window_size, input_dim = train_data_ML.x.shape accum_mean = 0.0 for epoch in range(epochs): model.zero_grad() meta_learner._model.zero_grad() #train #batch_idx = np.random.randint(0, total_tasks-1, batch_size) for batch_idx in range(0, total_tasks - 1, batch_size): x_spt, y_spt = train_data_ML[batch_idx:batch_idx + batch_size] x_qry, y_qry = train_data_ML[batch_idx + 1:batch_idx + 1 + batch_size] x_spt, y_spt = to_torch(x_spt), to_torch(y_spt) x_qry = to_torch(x_qry) y_qry = to_torch(y_qry) # data augmentation epsilon = grid[np.random.randint(0, len(grid))] if noise_type == "additive": y_spt = y_spt + epsilon y_qry = y_qry + epsilon else: y_spt = y_spt * (1 + epsilon) y_qry = y_qry * (1 + epsilon) train_tasks = [ Task(model.encoder(x_spt[i]), y_spt[i]) for i in range(x_spt.shape[0]) ] val_tasks = [ Task(model.encoder(x_qry[i]), y_qry[i]) for i in range(x_qry.shape[0]) ] adapted_params = meta_learner.adapt(train_tasks) mean_loss = meta_learner.step(adapted_params, val_tasks, is_training=True) accum_mean += mean_loss.cpu().detach().numpy() progressBar(batch_idx, total_tasks, 100) print(accum_mean / (batch_idx + 1)) #test val_error = test(validation_data_ML, meta_learner, model, device) test_error = test(test_data_ML, meta_learner, model, device) print("Epoch:", epoch) print("Val error:", val_error) print("Test error:", test_error) early_stopping(val_error, model) early_stopping2(val_error, meta_learner) if early_stopping.early_stop: print("Early stopping") break print("hallo") model.load_state_dict(torch.load(save_model_file_encoder)) model2.load_state_dict( torch.load(save_model_file_)["model_state_dict"]) meta_learner = MetaLearner(model2, optimizer, fast_lr, loss_func, first_order, n_inner_iter, inner_loop_grad_clip, device) validation_error = test(validation_data_ML, meta_learner, model, device) test_error = test(test_data_ML, meta_learner, model, device) validation_error_h1 = test(validation_data_ML, meta_learner, model, device, horizon=1) test_error_h1 = test(test_data_ML, meta_learner, model, device, horizon=1) model.load_state_dict(torch.load(save_model_file_encoder)) model2.load_state_dict( torch.load(save_model_file_)["model_state_dict"]) meta_learner2 = MetaLearner(model2, optimizer, fast_lr, loss_func, first_order, 0, inner_loop_grad_clip, device) validation_error_h0 = test(validation_data_ML, meta_learner2, model, device, horizon=1) test_error_h0 = test(test_data_ML, meta_learner2, model, device, horizon=1) model.load_state_dict(torch.load(save_model_file_encoder)) model2.load_state_dict( torch.load(save_model_file_)["model_state_dict"]) meta_learner2 = MetaLearner(model2, optimizer, fast_lr, loss_func, first_order, n_inner_iter, inner_loop_grad_clip, device) validation_error_mae = test(validation_data_ML, meta_learner2, model, device) test_error_mae = test(test_data_ML, meta_learner2, model, device) print("test_error_mae", test_error_mae) with open(output_directory + "/results2.txt", "a+") as f: f.write("Test error: %f \n" % test_error) f.write("Validation error: %f \n" % validation_error) f.write("Test error h1: %f \n" % test_error_h1) f.write("Validation error h1: %f \n" % validation_error_h1) f.write("Test error h0: %f \n" % test_error_h0) f.write("Validation error h0: %f \n" % validation_error_h0) f.write("Test error mae: %f \n" % test_error_mae) f.write("Validation error mae: %f \n" % validation_error_mae) print(test_error) print(validation_error)
def train(self, num_trials): for trial in xrange(num_trials): mr = self._update_weights() progressBar(trial, num_trials, info="Mean returns: {}".format(mr)) progressBar(num_trials, num_trials) print "Done"
psms = pd.read_csv(idTxt, skiprows=1, sep=";") # Note that ID.txt file is delimited by semicolon psms = psms[["Peptide", "Outfile", "measuredMH", "XCorr"]] psms = psms.loc[psms["Outfile"].str.contains( mzXMLBaseName)] # Extract PSMs from FTLD_Batch2_F50.mzXML psms["charge"] = [ outfile.split("/")[-1].split(".")[-2] for outfile in psms["Outfile"] ] psms = psms.drop_duplicates() print(" PSM information has been parsed\n") # Unique key is peptide-charge pair keys = psms["Peptide"] + "_" + psms["charge"] keys = list(set(keys)) res = [] progress = utils.progressBar(len(keys)) for key in keys: progress.increment() pep, z = key.split("_") rtArray = np.array([]) intArray = np.array([]) for _, psm in psms[(psms["Peptide"] == pep) & (psms["charge"] == z)].iterrows(): [_, psmScanNum, _, _, _] = os.path.basename(psm["Outfile"]).split(".") psmScanNum = int(psmScanNum) surveyScanNum = ms2ToSurvey[psmScanNum] _, precIntensity, precRt = getPrecursorPeak(reader, int(psmScanNum), surveyScanNum, params) rtArray = np.append(rtArray, precRt) intArray = np.append(intArray, precIntensity) rt = sum(rtArray * intArray) / sum(intArray) / 60