def doench_predictions(): f_new = open("doench_scores.txt", 'a+') f_new.truncate(0) num_lines = 0 for line in open('just_guides.txt'): if "+" in line or "-" in line: num_lines += 1 set = np.empty(num_lines, dtype='object') with open('just_guides.txt') as fp: cnt = 0 line = fp.readline() while (not line.isspace()): if not "+" in line and not "-" in line: break else: print(line) if "+" in line: line = line[:line.find("+")] if "-" in line: line = str(Seq(line[:line.find("-")]).reverse_complement()) set[cnt] = line line = fp.readline() cnt += 1 predictions = model_comparison.predict(set, None, None) for i, prediction in enumerate(predictions): f_new.write(str(predictions[i]) + '\n')
def calcFusiDoench(seqs): import model_comparison """ Input is a 30mer: 4bp 5', 20bp guide, 3bp PAM, 3bp 5' based on source code sent by John Doench. A slightly modified code is now called 'Azimuth', see calcAziScore """ aa_cut = 0 per_peptide=0 f = open(join(fusiDir, 'saved_models/V3_model_nopos.pickle')) model= pickle.load(f) # if this fails, install sklearn like this: pip install scikit-learn==0.16.1 res = [] for seq in seqs: if "N" in seq: res.append(-1) # can't do Ns continue pam = seq[25:27] if pam!="GG": #res.append(-1) #continue seq = list(seq) seq[25] = "G" seq[26] = "G" seq = "".join(seq) score = model_comparison.predict(seq, aa_cut, per_peptide, model=model) res.append(int(round(100*score))) return res
def on_target_scoring( spacers: pd.DataFrame, rule_set: Optional[str] = None, on_target_score_threshold: float = 0.0, ) -> pd.DataFrame: """ Parameters ---------- spacers : :class:`~pandas.DataFrame` rule_set : `str` on_target_score_threshold : `float` Return ------ :class:`~pandas.DataFrame` """ if rule_set is None: spacers["on_target_score"] = ( np.ones(shape=spacers["spacer"].values.shape, dtype=np.uint8) * 100) elif isinstance(rule_set, str): if rule_set == "1": spacerlist = spacers["spacer"].tolist() initialnumber = len(spacers) print(f"Found {initialnumber} potential spacers. Now scoring") sublist = [] queue = Manager().Queue() pool = Pool() func = partial( score_entry, method=calc_score, place=queue, cutoff=on_target_score_threshold, ) mapObj = pool.map_async(func, spacerlist, callback=sublist.append) # Initialize progress # While the pool has not finished its task while not mapObj.ready(): # Get the report from the process queue on how many spacers they have scored since # we last looked for _ in range(queue.qsize()): queue.task_done() mapObj.wait() spacerscores = np.asarray([x for x in sublist[0]]) spacers["on_target_score"] = spacerscores elif rule_set.lower() == "azimuth": spacers["on_target_score"] = predict( spacers["spacer"].values) * 100 elif rule_set.lower() == "none": spacers["on_target_score"] = ( np.ones(shape=spacers["spacer"].values.shape, dtype=np.uint8) * 100) spacers = spacers[spacers["on_target_score"] > on_target_score_threshold] return spacers
import numpy as np from azimuth.model_comparison import predict import contextlib @contextlib.contextmanager def redirect_stdout(target): original = sys.stdout try: sys.stdout = target yield finally: sys.stdout = original # Lightweight CLI wrapper for running model predictions on sequence data if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--sequences', dest='sequences', action='store', type=str, nargs='+', required=True) options = parser.parse_args() # Capture and suppress print output from model, so that we can cleanly return results on stdout f = io.StringIO() with redirect_stdout(f): # Run model predictions = predict(np.array(options.sequences), None, None) # Print predictions to stdout for further use for seq, pred in zip(options.sequences, predictions): print("{} -> {}".format(seq, pred))
def predict_moreno(seq): return predict(seq, aa_cut=None, percent_peptide=None, model_file="moreno_model.pkl")