Beispiel #1
0
def map(line):
	# find the title
	common.ununicode(line)
	title_match=re.match('<page>\s*<title>(.*?)<',line)
	if title_match:
		title=title_match.group(1)
	else:
		title=''
	#find enclosed links
	if re.search('\[\[',line):	# make sure there is a link first
		link_array=re.split('\[\[',line)
		link_array.pop(0)	# we don't care about the stuff before the first link
		for link in link_array:
#			link_match=re.match('(.*?)\||\]\]',link)
#			if link_match:
#				print link_match.groups()
  			split_link=re.split('\]\]',link)
 			link_text=split_link[0]
  			link_text_array=re.split('\|',link_text)
  			yield(title.upper(),link_text_array[0].upper())
Beispiel #2
0
def map(line):
    # find the title
    common.ununicode(line)
    title_match = re.match('<page>\s*<title>(.*?)<', line)
    if title_match:
        title = title_match.group(1)
    else:
        title = ''
    #find enclosed links
    if re.search('\[\[', line):  # make sure there is a link first
        link_array = re.split('\[\[', line)
        link_array.pop(
            0)  # we don't care about the stuff before the first link
        for link in link_array:
            #			link_match=re.match('(.*?)\||\]\]',link)
            #			if link_match:
            #				print link_match.groups()
            split_link = re.split('\]\]', link)
            link_text = split_link[0]
            link_text_array = re.split('\|', link_text)
            yield (title.upper(), link_text_array[0].upper())
Beispiel #3
0
def map(line):
    common.ununicode(line)
    i = 0
    for word in line.split():
        i = i + 1
    yield ("1", str(i))
Beispiel #4
0
def map(line):
    common.ununicode(line)
    i = 0
    for word in line.split():
        i = i + 1
    yield ("1", str(i))