예제 #1
0
# Exercise 5b - Set complements

# You're processing some DNA sequences and you notice that some of them
# have been corrupted - they contain letters other than A,T,C and G!
# Write a regex that will let you remove the corrupted sequences.

from test_regex import test_regex

not_dna_regex = r'[^ATCG]'  # replace ellipsis with a regex that matches
# sequences that contain anything other than A, T, C & G
test_regex('Not DNA regex', not_dna_regex, ['ATCHGA', '#*@!'],
           ['AGGGGGCTAA', 'ACGAT'])

# Notice that your regex matches non-alphabetic characters as well!
예제 #2
0
# Exercise 7

import re
from test_regex import test_regex

# Write a regex that matches filenames ending in .doc and .odt

word_file_regex = r'\.(doc|odt)$'

test_regex('Word file regex', word_file_regex,
           ['math_girls.doc', 'mathgirls2.odt'],
           ['other.ddt', 'odt', 'mathgirlsodt'])
예제 #3
0
# Exercise 5b - Set complements

# You're processing some DNA sequences and you notice that some of them 
# have been corrupted - they contain letters other than A,T,C and G! 
# Write a regex that will let you remove the corrupted sequences.

from test_regex import test_regex

not_dna_regex = r'[^ATCG]' # replace ellipsis with a regex that matches 
                           # sequences that contain anything other than A, T, C & G
test_regex('Not DNA regex', not_dna_regex, 
    ['ATCHGA', '#*@!'], ['AGGGGGCTAA', 'ACGAT'])

# Notice that your regex matches non-alphabetic characters as well!
# Exercise 2 - Specifying custom sets of characters

# We saw some metacharacters in the previous slide: \w, \d, \s
# Define custom character ranges that are equivalent to these, 
# WITHOUT using metacharacters (escape sequences like \n are fine)

# Exercise 2a
digit_regex     = r'[...]' # match definition of \d

# Exercise 2b
wordlike_regex  = r'[...]' # match definition of \w (at least, for English)

# Exercise 2c
spacelike_regex = r'[...]' # match definition of \s

# No need to edit below this point. Run this program to see if your answers were correct.
from test_regex import test_regex
test_regex('Digit regex', digit_regex, '0123456789', 'aZ \t\n$_')
test_regex('Wordlike regex', wordlike_regex, 'ampBLY_058', ' \t\n-$')
test_regex('Spacelike regex', spacelike_regex, ' \t\r\n', 'ahlB_-$9')
# Exercise 7

import re
from test_regex import test_regex

# Write a regex that matches filenames ending in .doc and .odt

word_file_regex = r'...'

test_regex('Word file regex', word_file_regex, 
    ['math_girls.doc', 'mathgirls2.odt'], 
    ['other.ddt', 'odt'])
예제 #6
0
import re
from test_regex import test_regex

# ---------------
#  Exercise 6a
# ---------------

# Write a regex that matches the words bad, baad, baaad 
# for any arbitrary number of a's

animal_farm_bleat_regex = r'\bba+d\b' # replace the ellipsis with your regex

test_regex('Animal farm bleat', animal_farm_bleat_regex, 
    ['baaaaaaad', 'This is baaaaaaaaaaad!'],  # should match these
    ['bd', 'bacd', 'baaaaa', 'sinbaaaad'])    # shouldn't match these
    

# ---------------
# Exercise 6b
# ---------------

# Write a regex that matches the word baaad (3 a's) up to baaaaaaaaaad (10 a's).

animal_farm_short_bleat_regex = r'\bba{3,10}d\b'

test_regex('Animal farm short bleat', animal_farm_short_bleat_regex, 
    ['baaaaaaad', 'This is baaaaaaaaaad!'], # should match these
    ['bd', 'bacd', 'baaaaa', 'sinbaaaad', 'baad', 'baaaaaaaaaaaad']) # not these


# ---------------
예제 #7
0
# Exercise 5c - Set complements

# You're looking at a list of purchases again. 
# Acme company uses product codes with A,B,C followed by three digits.
# Axeme company uses product codes X,Y,Z followed by three digits.

from test_regex import test_regex

# Write a regex to extract lines with just Acme and Axeme's product codes, 
# *using a set complement* rather than r'[ABCXYZ]...'

# Also, make sure that the product code occurs at the *end* of the line

acme_axeme_regex = r'[^D-W]\d\d\d' # replace the ellipsis with your regex

# No need to edit below this line
matches = ['This line contains Acme product code C180', 
           'This line contains Axeme product X007']
nonmatches = ['This line contains Hugo\'s product code J982',
              'This line contains an incorrect Axeme product code Z48A',
              'This line contains Acme code C180 but it\'s not at the end.']
test_regex('Acme/Axeme regex', acme_axeme_regex, matches, nonmatches)

# Notice that we did something a bit dangerous here - 
# the regex also matches product codes like '0000'.
# So, be careful when using set complements!

import re
from test_regex import test_regex

# ---------------
#  Exercise 6a
# ---------------

# Write a regex that matches the words bad, baad, baaad 
# for any arbitrary number of a's

animal_farm_bleat_regex = r'...' # replace the ellipsis with your regex

test_regex('Animal farm bleat', animal_farm_bleat_regex, 
    ['baaaaaaad', 'This is baaaaaaaaaaad!'],  # should match these
    ['bd', 'bacd', 'baaaaa', 'sinbaaaad'])    # shouldn't match these
    

# ---------------
# Exercise 6b
# ---------------

# Write a regex that matches the word baaad (3 a's) up to baaaaaaaaaad (10 a's).

animal_farm_short_bleat_regex = r'...'

test_regex('Animal farm short bleat', animal_farm_short_bleat_regex, 
    ['baaaaaaad', 'This is baaaaaaaaaad!'], # should match these
    ['bd', 'bacd', 'baaaaa', 'sinbaaaad', 'baad', 'baaaaaaaaaaaad']) # not these


# ---------------
예제 #9
0
# Exercise 2 - Specifying custom sets of characters

# We saw some metacharacters in the previous slide: \w, \d, \s
# Define custom character ranges that are equivalent to these, 
# WITHOUT using metacharacters (escape sequences like \n are fine)

# Exercise 2a
digit_regex     = r'[0-9]' # match definition of \d

# Exercise 2b
wordlike_regex  = r'[a-zA-Z0-9_]' # match definition of \w (at least, for English)

# Exercise 2c
spacelike_regex = r'[\n\t\r ]' # match definition of \s

# No need to edit below this point. Run this program to see if your answers were correct.
from test_regex import test_regex
test_regex('Digit regex', digit_regex, '0123456789', 'aZ \t\n$_')
test_regex('Wordlike regex', wordlike_regex, 'ampBLY_058', ' \t\n-$')
test_regex('Spacelike regex', spacelike_regex, ' \t\r\n', 'ahlB_-$9')
예제 #10
0
# Exercise 5a - Set complements

from test_regex import test_regex

# Let's rearrange the metacharacter_regex from Exercise 4d.
# Why doesn't it work as we wanted?

metacharacter_regex = r'[^$.]'
test_regex('Metacharacter regex', metacharacter_regex, '^$.', 'a1')

# Edit the metacharacter regex below to make it work,
# in some way OTHER than rearranging the characters again

edited_metacharacter_regex = r'[\^\$\.]'
test_regex('Edited Metacharacter regex', edited_metacharacter_regex, '^$.',
           'a1')

# So, saying "within character sets [], metacharacters have their
# regular meaning except backslashes" is true EXCEPT that
# ^ means complement if it's the first character [^...].
# Elsewhere, it means the caret character.
예제 #11
0
# The purpose of this exercise is to illustrate that metacharacters
# retain their regular meaning within character sets []

import re
from test_regex import test_regex

# Instructions: read the regex, and run the code to verify that
# it does indeed match the literal characters $, ^ and .

metacharacter_regex = r'[$^.]'

# Test regex to make sure it matches '^','$','.' and not 'a' or '1'.
test_regex('Metacharacter regex', metacharacter_regex, '^$.', 'a1')
# Exercise 5a - Set complements
import re
from test_regex import test_regex
# Let's rearrange the metacharacter_regex from Exercise 4d. 
# Why doesn't it work as we wanted?

metacharacter_regex = r'[^$.]'
test_regex('Metacharacter regex', metacharacter_regex, '^$.', 'a1')

# Edit the metacharacter regex below to make it work,
# in some way OTHER than rearranging the characters again

edited_metacharacter_regex = r'[^$.]'
test_regex('Edited Metacharacter regex', edited_metacharacter_regex, '^$.', 'a1')

# So, saying "within character sets [], metacharacters have their 
# regular meaning except backslashes" is true EXCEPT that 
# ^ means complement if it's the first character [^...]. 
# Elsewhere, it means the caret character.
예제 #13
0
# Exercise 5c - Set complements

# You're looking at a list of purchases again.
# Acme company uses product codes with A,B,C followed by three digits.
# Axeme company uses product codes X,Y,Z followed by three digits.

from test_regex import test_regex

# Write a regex to extract lines with just Acme and Axeme's product codes,
# *using a set complement* rather than r'[ABCXYZ]...'

# Also, make sure that the product code occurs at the *end* of the line

acme_axeme_regex = r'[^D-W]\d\d\d'  # replace the ellipsis with your regex

# No need to edit below this line
matches = [
    'This line contains Acme product code C180',
    'This line contains Axeme product X007'
]
nonmatches = [
    'This line contains Hugo\'s product code J982',
    'This line contains an incorrect Axeme product code Z48A',
    'This line contains Acme code C180 but it\'s not at the end.'
]
test_regex('Acme/Axeme regex', acme_axeme_regex, matches, nonmatches)

# Notice that we did something a bit dangerous here -
# the regex also matches product codes like '0000'.
# So, be careful when using set complements!