Beispiel #1
0
def longest_and_shortest(data_list):
    data.sort(key=lambda l: len(l), reverse=True)

    for line in data_list:
        regex = re.complile(r'>\s?(.*)')
        ##identifier or what ever it is
        matches = re.findall(regex, line)
 def freeTextTermQuery(self, string):
     pattern = re.complile('[\W_]+')
     string = pattern.sub(' ', string)
     result = []
     for word in string.split():
         result += self.oneWordKeyWordQuery(word)
     return self.rankResults(list(set(result)), string)
def getInternalLinks(bs, includeUrl):
    includeUrl = '{}://{}'.format(
        urlparse(includeUrl).scheme,
        urlparse(includeUrl).netloc)
    internalLinks = []
    # Finds all links that begin win a "/"
    for link in bs.findAll('a', href=re.complile('^(/|.*' + includeUrl + ')')):
        if link.attrs['href'] is not None:
            if link.attrs['href'] not in internalLinks:
                if (link.attrs['href'].startswith('/')):
                    internalLinks.append(includeUrl + link.attrs['href'])
                else:
                    internalLinks.append(link.attrs['href'])
    return internalLinks
Beispiel #4
0
def assert_response(resp, contains=None, matches=None, headers=None, status='200'):
    assert status in resp.status, "Expected response %r not in %r" % (status, resp.status)

    if status == "200":
        assert resp.data, "Response data is empty."

    if contains:
        assert contains in resp.data, "Response does not contain %r" % contains

    if matches:
        reg = re.complile(matches)
        assert reg.matches(resp.data), "Response does not match %r" % matches

    if headers:
        assert_equal(resp.headers, headers)
def episodeScraper(ep):
	driver = webdriver.PhantomJS('/Users/dheepan.ramanan/Documents/Resources/phantomjs-2.1.1-macosx/bin/phantomjs')
	driver.get(ep)
	b = bs(driver.page_source,'html.parser')
	charactercollection=[]
	body = b.find('div', attrs={'class' : 'lyrics'})
	dialogue = re.split('\n', body.text)
	episode = re.sub('by.*','',ep)
	pattern = re.complile(r'[^A-Z.\s:]')
	state = 0
	count = 0
	for line in dialogue:
                if line == '':
                    pass
                else:
		         m = re.search(pattern,line)
			   if m == None:
                        setting = line
                        state = 1
                        count +=1									
import os
import re

path = '/Volumes/Network/courses/sp/data/'

match = re.complile(r' ')

with open(os.path.join(path, 'speakerids.txt'), 'w') as speakers:
    with open(os.path.join(path, 'info.txt'), 'r') as info:
        if match:
            # write sID to file if it matches the criteria set above
            speakers.write(path + match.group1() + '\n')
Beispiel #7
0
""")

IN = re.compile(r'.*\bin\b(?!\b.+ing)')
for doc in nltk.corpus.ieer.parsed_docs('NYT_19980315'):
    for rel in nltk.sem.extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern=IN):
        print(nltk.sem.rtuple(rel))
print("-" * 40)

from nltk.corpus import conll2002
vnv = """
(
is/VI    # 3rd sing present and
was/VI   # past form of the verb zijn ('be')
werd/VI  # and also present
wordt/V  #past of worden('become')
)
.*       # followed by anything
van/Prep # followed by van ('of')
"""
VAN = re.complile(vnv, re.VERBOSE)
for doc in conll2002.chunked_sents('ned.train'):
    for r in nltk.sem.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN):
        print(nltk.sem.clause(r, relsym='VAN'))
print("-" * 40)

for doc in conll2002.chunked_sents('ned.train'):
    for r in nltk.sem.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN):
        print(nltk.sem.rtuple(r, lcon=True, rcon=True))
print("-" * 40)

Beispiel #8
0
from flask import Flask, request, redirect, render_template, session, flash
from mysqlconnection import MySQLConnector

from Tkinter import *
import re

app = Flask(__name__)
app.secret_key = "ThisIsSecret"
mysql = MySQLConnector(app, 'friendsdb') #DATABASE
    #   passing in app and friendsdb as arguments; arguments are what are passed in parameters are what functions have.
NAME_REGEX = re.complile('^[a-zA-Z]') #name regex - no spaces, no numbers
        #  making a variable name_regex that is passing in the variable.
EMAIL_REGEX = re.compile(r'^[a-zA-Z0-9\+--]+@[a-zA-Z0-9\._-]+\.[a-zA-Z]*$')
        # This is running the compile function.

#Display ALL FRIENDS *** IS WORKING! DO NOT TOUCH THIS ***

@app.route('/', methods=['POST', 'GET'])
def index(): #handler function
    friends = [] # init empty friend
    # created the variable freinds setting it to an empty list.
    try:
        query = "SELECT * FROM friends"
        friends = mysql.query_db(query)
    except Exception as e:
        flash("Friends are having issues. Try back later.")
        # If the try block returns an error instead of doing what it is suppose too.
    return render_template('index.html', all_friends=friends)

#DISPLAY SINGLE USER RECORD - *** IS WORKING! DO NOT TOUCH ***
Beispiel #9
0
import re

texto = '0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f'
pattern9 = re.complile('9')
match1 = re.search(pattern9, texto)
#Exibe a primeira e a última posição e o elemento que foi encontrado de acordo com a expressão passada
print "Posicoes %s, %s; Valor: %s." % (match1.start(), match1.end(), match1.group(0))

valores = re.findall('[a-f]', texto)
#Exibe todos os elementos encontrados no texto de acordo com a expressão regular passada no findall
print "Valores: %s" % valores
Beispiel #10
0
import re

roman_numeral_map = (...)

roman_numeral_pattern = re.complile('''
    ^
    M{0,3}
    (CM|CD|D?C{0,3})
    (XC|XL|L?X{0,3})
    (IX|IV|V?I{0,3})
    $
''', re.VERBOSE)
import unittest


class knownValues(unittest.TestCase):

    known_values = ((1, 'I'),
                    (2, 'II'),
                    (3, 'III'),
                    (4, 'IV'),
                    (5, 'V'),
                    (6, 'VI'),
                    (7, 'VII'),
                    (8, 'VIII'),
                    (9, 'IX'),
                    (10, 'X'),
                    (50, 'L'),
                    (100, 'C'),
                    (500, 'D'),
                    (1000, 'M'),
Beispiel #11
0
"""
	23. セクション構造
	記事中に含まれるセクション名とそのレベル(例えば”== セクション名 ==”なら1)を表示せよ.
"""

from q20 import q20
import re

data = q20()

r = re.complile(r'(=+)([^=]*)=+')

for line in data.split('\n'):
    if len(line) > 0 and line[0] == '=':
        match = r.match(line)
        level = len(match.group(1)) - 1
        text = match.group(2)
        print('{}\t{}'.format(level, text))
import re

# ingredient has mixed factions followed y unit and ingredient
recipe_line = "1 1/12 ml flour"  # change to input statement in due course

mixed_regex = "\d{1,3}\s\d{1,3}\/\d{1,3}"

if re.match(mixed_regex, recipe_line):
    print("true")
    # Get mixed numbers by matching the regex
    pre_mixed_num = re.match(mixed_regex, recipe_line)
    mixed_num = pre_mixed_num.group()

    # Replace space with a + sign...
    amount = mixed_num.replace(" ", "+")
    # change the stiring into a decimal
    amount = eval(amount)
    print(amount)

    # Get unit and inghredient...
    compile_regex = re.complile(mixed_regex)
    print(compile_regex)
    unit_ingredient = re.split(compile_regex, recipe_line)
    print(unit_ingredient)

get_unit = unit_ingredient.split(" ", 1)  # splits text at first space
print(get_unit)
Beispiel #13
0
spam002.txt, and so on, in a single folder and locates any gaps in the numbering
(such as if there is a spam001.txt and spam003.txt but no spam002.txt).
Have the program rename all the later files to close this gap.
As an added challenge, write another program that can insert gaps
into numbered files so that a new file can be added.
'''

#locates any gaps in the numbering

#rename all the later files to close this gap

import shutil, os, re

#finds all files with a given prefix in a single folder
# Create a regex that matches part of filename to be substituted
namePattern = re.complile(r'^\d\(d)?.-.*?')

#TODO: Loop over the files in the working directory.
for filename in os.listdir(
        '/Users/caitlin/Documents/school/itc_110/abs_hw/ch9/waiteSmith/majorArcana'
):
    mo = namePattern.search(filename)

    #TODO: Skip files without a date.
    if mo != None:
        print(filename)
'''
#TODO: Get the different parts of the filename.
    beforePart = mo.group(1)
    monthPart = mo.group(2)
    dayPart = mo.group(4)
 def separatewords(self, text):
     splitter = re.complile('\\W*')
     return [s.lower() for s in splitter.split(text) if s != '']
Beispiel #15
0
Have the program rename all the later files to close this gap.
As an added challenge, write another program that can insert gaps
into numbered files so that a new file can be added.
"""


# locates any gaps in the numbering

# rename all the later files to close this gap


import shutil, os, re

# finds all files with a given prefix in a single folder
# Create a regex that matches part of filename to be substituted
namePattern = re.complile(r"^\d\(d)?.-.*?")

# TODO: Loop over the files in the working directory.
for filename in os.listdir("/Users/caitlin/Documents/school/itc_110/abs_hw/ch9/waiteSmith/majorArcana"):
    mo = namePattern.search(filename)

    # TODO: Skip files without a date.
    if mo != None:
        print(filename)
"""
#TODO: Get the different parts of the filename.
    beforePart = mo.group(1)
    monthPart = mo.group(2)
    dayPart = mo.group(4)
    yearPart = mo.group(6)
    afterPart = mo.group(8)