def maskSequences(sequences, masker=None):
    '''return a list of masked sequence.

    *masker* can be one of
        dust/dustmasker * run dustmasker on sequences
        softmask        * use softmask to hardmask sequences
    '''

    if masker in ("dust", "dustmasker"):
        masker_object = Masker.MaskerDustMasker()
    else:
        masker_object = None

    if masker == "softmask":
        # the genome sequence is repeat soft-masked
        masked_seq = sequences
    elif masker in ("dust", "dustmasker"):
        # run dust
        masked_seq = masker_object.maskSequences(
            [x.upper() for x in sequences])
    elif masker is None:
        masked_seq = [x.upper() for x in sequences]
    else:
        raise ValueError("unknown masker %s" % masker)

    # hard mask softmasked characters
    masked_seq = [re.sub("[a-z]", "N", x) for x in masked_seq]

    return masked_seq
예제 #2
0
def maskSequences(sequences, masker):

    if masker == "repeatmasker":
        # the genome sequence is repeat masked
        masked_seq = sequences
    elif masker == "dust":
        masker_object = Masker.MaskerDustMasker()
        masked_seq = [masker_object(x.upper()) for x in sequences]
    else:
        masked_seq = [x.upper() for x in sequences]

    # hard mask softmasked characters
    masked_seq = [re.sub("[a-z]", "N", x) for x in masked_seq]
    return masked_seq
예제 #3
0
class DustMaskerCheck(unittest.TestCase):
    mMasker = Masker.MaskerDustMasker()