Example #1
0
def doench_predictions():
    f_new = open("doench_scores.txt", 'a+')
    f_new.truncate(0)
    num_lines = 0
    for line in open('just_guides.txt'):
        if "+" in line or "-" in line:
            num_lines += 1

    set = np.empty(num_lines, dtype='object')
    with open('just_guides.txt') as fp:
        cnt = 0
        line = fp.readline()
        while (not line.isspace()):
            if not "+" in line and not "-" in line:
                break
            else:
                print(line)
                if "+" in line:
                    line = line[:line.find("+")]

                if "-" in line:
                    line = str(Seq(line[:line.find("-")]).reverse_complement())

                set[cnt] = line

                line = fp.readline()
                cnt += 1

    predictions = model_comparison.predict(set, None, None)

    for i, prediction in enumerate(predictions):
        f_new.write(str(predictions[i]) + '\n')
Example #2
0
def calcFusiDoench(seqs):
    import model_comparison
    """
    Input is a 30mer: 4bp 5', 20bp guide, 3bp PAM, 3bp 5'
    based on source code sent by John Doench.
    A slightly modified code is now called 'Azimuth', see calcAziScore
    """
    aa_cut = 0
    per_peptide=0
    f = open(join(fusiDir, 'saved_models/V3_model_nopos.pickle'))
    model= pickle.load(f) # if this fails, install sklearn like this: pip install scikit-learn==0.16.1
    res = []
    for seq in seqs:
        if "N" in seq:
            res.append(-1) # can't do Ns
            continue

        pam = seq[25:27]
        if pam!="GG":
            #res.append(-1)
            #continue
            seq = list(seq)
            seq[25] = "G"
            seq[26] = "G"
            seq = "".join(seq)
        score = model_comparison.predict(seq, aa_cut, per_peptide, model=model)
        res.append(int(round(100*score)))
    return res
def on_target_scoring(
    spacers: pd.DataFrame,
    rule_set: Optional[str] = None,
    on_target_score_threshold: float = 0.0,
) -> pd.DataFrame:
    """

    Parameters
    ----------
    spacers : :class:`~pandas.DataFrame`
    rule_set : `str`
    on_target_score_threshold : `float`

    Return
    ------
    :class:`~pandas.DataFrame`
    """
    if rule_set is None:
        spacers["on_target_score"] = (
            np.ones(shape=spacers["spacer"].values.shape, dtype=np.uint8) *
            100)
    elif isinstance(rule_set, str):
        if rule_set == "1":
            spacerlist = spacers["spacer"].tolist()
            initialnumber = len(spacers)
            print(f"Found {initialnumber} potential spacers.  Now scoring")
            sublist = []
            queue = Manager().Queue()
            pool = Pool()
            func = partial(
                score_entry,
                method=calc_score,
                place=queue,
                cutoff=on_target_score_threshold,
            )
            mapObj = pool.map_async(func, spacerlist, callback=sublist.append)
            # Initialize progress
            # While the pool has not finished its task
            while not mapObj.ready():
                # Get the report from the process queue on how many spacers they have scored since
                # we last looked
                for _ in range(queue.qsize()):
                    queue.task_done()
            mapObj.wait()
            spacerscores = np.asarray([x for x in sublist[0]])
            spacers["on_target_score"] = spacerscores
        elif rule_set.lower() == "azimuth":
            spacers["on_target_score"] = predict(
                spacers["spacer"].values) * 100
        elif rule_set.lower() == "none":
            spacers["on_target_score"] = (
                np.ones(shape=spacers["spacer"].values.shape, dtype=np.uint8) *
                100)
    spacers = spacers[spacers["on_target_score"] > on_target_score_threshold]
    return spacers
Example #4
0
import numpy as np
from azimuth.model_comparison import predict
import contextlib


@contextlib.contextmanager
def redirect_stdout(target):
    original = sys.stdout
    try:
        sys.stdout = target
        yield
    finally:
        sys.stdout = original

# Lightweight CLI wrapper for running model predictions on sequence data
if __name__ == '__main__':
    parser = argparse.ArgumentParser()

    parser.add_argument('--sequences', dest='sequences', action='store', type=str, nargs='+', required=True)
    options = parser.parse_args()

    # Capture and suppress print output from model, so that we can cleanly return results on stdout
    f = io.StringIO()
    with redirect_stdout(f):
        # Run model
        predictions = predict(np.array(options.sequences), None, None)

    # Print predictions to stdout for further use
    for seq, pred in zip(options.sequences, predictions):
        print("{} -> {}".format(seq, pred))
def predict_moreno(seq):
    return predict(seq,
                   aa_cut=None,
                   percent_peptide=None,
                   model_file="moreno_model.pkl")