Example #1
0
def validate_sequences(sequences, regexp, name, minimum_CG, maximum_CG, maximum_offtarget, immuno):
    """
    Here we remove all bad sequences (siRNA) by validators
    """

    # filter sequences here by no expensive features
    preprocessed = filter(
        lambda sequence: all(
            [
                validate_gc_content(sequence, minimum_CG, maximum_CG),
                validate_immuno(sequence, immuno),
                validate_thermostability(sequence),
            ]
        ),
        sequences,
    )
    # uncomment if debuging
    # return {
    #     name: [{
    #         "sequence": seq,
    #         "regexp": int(regexp),
    #         "offtarget": 0}
    #         for seq in preprocessed]
    # }

    # counting offtarget is expensive
    with allow_join_result():
        offtarget = (
            group(offtarget_seed.s(sequence).set(queue="blast") for sequence in preprocessed).apply_async().get()
        )

    return {
        name: [
            {"sequence": sequence, "regexp": int(regexp), "offtarget": actual_offtarget}
            for sequence, actual_offtarget in izip(preprocessed, offtarget)
            if actual_offtarget <= maximum_offtarget
        ]
    }
Example #2
0
 def test_validate_gc_content_not_in_range(self):
     not_in_range = validators.validate_gc_content('ACGT', 52, 70)
     self.assertFalse(not_in_range)
Example #3
0
 def test_validate_gc_content(self):
     is_in_range = validators.validate_gc_content('ACGT', 40, 60)
     self.assertTrue(is_in_range)