def longest_and_shortest(data_list): data.sort(key=lambda l: len(l), reverse=True) for line in data_list: regex = re.complile(r'>\s?(.*)') ##identifier or what ever it is matches = re.findall(regex, line)
def freeTextTermQuery(self, string): pattern = re.complile('[\W_]+') string = pattern.sub(' ', string) result = [] for word in string.split(): result += self.oneWordKeyWordQuery(word) return self.rankResults(list(set(result)), string)
def getInternalLinks(bs, includeUrl): includeUrl = '{}://{}'.format( urlparse(includeUrl).scheme, urlparse(includeUrl).netloc) internalLinks = [] # Finds all links that begin win a "/" for link in bs.findAll('a', href=re.complile('^(/|.*' + includeUrl + ')')): if link.attrs['href'] is not None: if link.attrs['href'] not in internalLinks: if (link.attrs['href'].startswith('/')): internalLinks.append(includeUrl + link.attrs['href']) else: internalLinks.append(link.attrs['href']) return internalLinks
def assert_response(resp, contains=None, matches=None, headers=None, status='200'): assert status in resp.status, "Expected response %r not in %r" % (status, resp.status) if status == "200": assert resp.data, "Response data is empty." if contains: assert contains in resp.data, "Response does not contain %r" % contains if matches: reg = re.complile(matches) assert reg.matches(resp.data), "Response does not match %r" % matches if headers: assert_equal(resp.headers, headers)
def episodeScraper(ep): driver = webdriver.PhantomJS('/Users/dheepan.ramanan/Documents/Resources/phantomjs-2.1.1-macosx/bin/phantomjs') driver.get(ep) b = bs(driver.page_source,'html.parser') charactercollection=[] body = b.find('div', attrs={'class' : 'lyrics'}) dialogue = re.split('\n', body.text) episode = re.sub('by.*','',ep) pattern = re.complile(r'[^A-Z.\s:]') state = 0 count = 0 for line in dialogue: if line == '': pass else: m = re.search(pattern,line) if m == None: setting = line state = 1 count +=1
import os import re path = '/Volumes/Network/courses/sp/data/' match = re.complile(r' ') with open(os.path.join(path, 'speakerids.txt'), 'w') as speakers: with open(os.path.join(path, 'info.txt'), 'r') as info: if match: # write sID to file if it matches the criteria set above speakers.write(path + match.group1() + '\n')
""") IN = re.compile(r'.*\bin\b(?!\b.+ing)') for doc in nltk.corpus.ieer.parsed_docs('NYT_19980315'): for rel in nltk.sem.extract_rels('ORG', 'LOC', doc, corpus='ieer', pattern=IN): print(nltk.sem.rtuple(rel)) print("-" * 40) from nltk.corpus import conll2002 vnv = """ ( is/VI # 3rd sing present and was/VI # past form of the verb zijn ('be') werd/VI # and also present wordt/V #past of worden('become') ) .* # followed by anything van/Prep # followed by van ('of') """ VAN = re.complile(vnv, re.VERBOSE) for doc in conll2002.chunked_sents('ned.train'): for r in nltk.sem.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN): print(nltk.sem.clause(r, relsym='VAN')) print("-" * 40) for doc in conll2002.chunked_sents('ned.train'): for r in nltk.sem.extract_rels('PER', 'ORG', doc, corpus='conll2002', pattern=VAN): print(nltk.sem.rtuple(r, lcon=True, rcon=True)) print("-" * 40)
from flask import Flask, request, redirect, render_template, session, flash from mysqlconnection import MySQLConnector from Tkinter import * import re app = Flask(__name__) app.secret_key = "ThisIsSecret" mysql = MySQLConnector(app, 'friendsdb') #DATABASE # passing in app and friendsdb as arguments; arguments are what are passed in parameters are what functions have. NAME_REGEX = re.complile('^[a-zA-Z]') #name regex - no spaces, no numbers # making a variable name_regex that is passing in the variable. EMAIL_REGEX = re.compile(r'^[a-zA-Z0-9\+--]+@[a-zA-Z0-9\._-]+\.[a-zA-Z]*$') # This is running the compile function. #Display ALL FRIENDS *** IS WORKING! DO NOT TOUCH THIS *** @app.route('/', methods=['POST', 'GET']) def index(): #handler function friends = [] # init empty friend # created the variable freinds setting it to an empty list. try: query = "SELECT * FROM friends" friends = mysql.query_db(query) except Exception as e: flash("Friends are having issues. Try back later.") # If the try block returns an error instead of doing what it is suppose too. return render_template('index.html', all_friends=friends) #DISPLAY SINGLE USER RECORD - *** IS WORKING! DO NOT TOUCH ***
import re texto = '0,1,2,3,4,5,6,7,8,9,a,b,c,d,e,f' pattern9 = re.complile('9') match1 = re.search(pattern9, texto) #Exibe a primeira e a última posição e o elemento que foi encontrado de acordo com a expressão passada print "Posicoes %s, %s; Valor: %s." % (match1.start(), match1.end(), match1.group(0)) valores = re.findall('[a-f]', texto) #Exibe todos os elementos encontrados no texto de acordo com a expressão regular passada no findall print "Valores: %s" % valores
import re roman_numeral_map = (...) roman_numeral_pattern = re.complile(''' ^ M{0,3} (CM|CD|D?C{0,3}) (XC|XL|L?X{0,3}) (IX|IV|V?I{0,3}) $ ''', re.VERBOSE) import unittest class knownValues(unittest.TestCase): known_values = ((1, 'I'), (2, 'II'), (3, 'III'), (4, 'IV'), (5, 'V'), (6, 'VI'), (7, 'VII'), (8, 'VIII'), (9, 'IX'), (10, 'X'), (50, 'L'), (100, 'C'), (500, 'D'), (1000, 'M'),
""" 23. セクション構造 記事中に含まれるセクション名とそのレベル(例えば”== セクション名 ==”なら1)を表示せよ. """ from q20 import q20 import re data = q20() r = re.complile(r'(=+)([^=]*)=+') for line in data.split('\n'): if len(line) > 0 and line[0] == '=': match = r.match(line) level = len(match.group(1)) - 1 text = match.group(2) print('{}\t{}'.format(level, text))
import re # ingredient has mixed factions followed y unit and ingredient recipe_line = "1 1/12 ml flour" # change to input statement in due course mixed_regex = "\d{1,3}\s\d{1,3}\/\d{1,3}" if re.match(mixed_regex, recipe_line): print("true") # Get mixed numbers by matching the regex pre_mixed_num = re.match(mixed_regex, recipe_line) mixed_num = pre_mixed_num.group() # Replace space with a + sign... amount = mixed_num.replace(" ", "+") # change the stiring into a decimal amount = eval(amount) print(amount) # Get unit and inghredient... compile_regex = re.complile(mixed_regex) print(compile_regex) unit_ingredient = re.split(compile_regex, recipe_line) print(unit_ingredient) get_unit = unit_ingredient.split(" ", 1) # splits text at first space print(get_unit)
spam002.txt, and so on, in a single folder and locates any gaps in the numbering (such as if there is a spam001.txt and spam003.txt but no spam002.txt). Have the program rename all the later files to close this gap. As an added challenge, write another program that can insert gaps into numbered files so that a new file can be added. ''' #locates any gaps in the numbering #rename all the later files to close this gap import shutil, os, re #finds all files with a given prefix in a single folder # Create a regex that matches part of filename to be substituted namePattern = re.complile(r'^\d\(d)?.-.*?') #TODO: Loop over the files in the working directory. for filename in os.listdir( '/Users/caitlin/Documents/school/itc_110/abs_hw/ch9/waiteSmith/majorArcana' ): mo = namePattern.search(filename) #TODO: Skip files without a date. if mo != None: print(filename) ''' #TODO: Get the different parts of the filename. beforePart = mo.group(1) monthPart = mo.group(2) dayPart = mo.group(4)
def separatewords(self, text): splitter = re.complile('\\W*') return [s.lower() for s in splitter.split(text) if s != '']
Have the program rename all the later files to close this gap. As an added challenge, write another program that can insert gaps into numbered files so that a new file can be added. """ # locates any gaps in the numbering # rename all the later files to close this gap import shutil, os, re # finds all files with a given prefix in a single folder # Create a regex that matches part of filename to be substituted namePattern = re.complile(r"^\d\(d)?.-.*?") # TODO: Loop over the files in the working directory. for filename in os.listdir("/Users/caitlin/Documents/school/itc_110/abs_hw/ch9/waiteSmith/majorArcana"): mo = namePattern.search(filename) # TODO: Skip files without a date. if mo != None: print(filename) """ #TODO: Get the different parts of the filename. beforePart = mo.group(1) monthPart = mo.group(2) dayPart = mo.group(4) yearPart = mo.group(6) afterPart = mo.group(8)