def __init__(self, filename, rules): """Create record class from filename and rules object""" self.date = filename[:filename.index('.')] self.year = int(self.date[0:4]) self.f = RecordFile(DIR + self.date[0:4] + '/' + filename) self.rules = rules
class Record: """ #todo description of the record class """ def __init__(self, filename, rules): """Create record class from filename and rules object""" self.date = filename[:filename.index('.')] self.year = int(self.date[0:4]) self.f = RecordFile(DIR + self.date[0:4] + '/' + filename) self.rules = rules @property def mayor(self): return self.get_prop('mayor') @property def council_president(self): return self.get_prop('council_president') @property def clerk(self): return self.get_prop('clerk') @property def council_members(self): # Grab 80 lines after the line containing "Residence" as a starting pt. lines = self.f.get_lines_after("Residence", 80) # lines that contain the name and address, look like: # Council Member Name ..................... Address main_lines = [line.rstrip() for line in lines if "..." in line] # Sometimes the lines don't contain elipses. If that's the case, use # an alternative method to get the council members if main_lines == []: return self.get_council_members() # Get the lines containing just the zip code zip_code_lines = [line.rstrip() for line in lines if re.search('\d{5}\n', line)] # this is in the form of: # { # 'Council member 1 name': { # 'address': '123 Street Address', # 'zipcode': '12345' # }, # 'Council member 2 name': {... # },... # } # # Wrong. Should be a list of people objects. # Position = Council member # Address = Address # Zipcode = Zipcode council_members = {} # keep track of zip codes separately in case an address is missing zipcode_ptr = 0 for i in range(0, len(main_lines)): # name is the contents up until "..", remove whitespace with # rstrip() name = main_lines[i][:main_lines[i].index('..')].rstrip() # address is everything after the last index of ".." address = main_lines[i][main_lines[i].rfind('..') + 2:] # no addres --> no zipcode if address == "": zipcode = "" else: #most lines have zip codes zipcode = zip_code_lines[zipcode_ptr] zipcode_ptr += 1 # create the council member council_members[name] = { 'address': address, 'zipcode': zipcode } # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(council_members) return council_members def get_council_members(self): """Alternative method of getting the council members. This is for the case where there are no elipses between the name and address """ lines = self.f.get_lines_after("Name Residence", 25) lines = [line.rstrip() for line in lines if line[0].isdigit()] council_members = {} for line in lines: line = line[2:].strip() if ("P.O. Box") in line: name = line[:line.index("P.O. Box")].rstrip() else: name = line[:re.search('\d', line).start()].rstrip() line = line.replace(name, "") zipcode = line[-5:] address = line.replace(zipcode, "").strip() council_members[name] = { 'address': address, 'zipcode': zipcode } # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(council_members) return council_members @property def cabinet(self): lines = self.f.get_lines_between("MAYOR", ["OFFICE OF", "DEPT.", "Ward\n", re.compile("\d{5}")]) cabinet = {} # Some titles don't fit on their line. In this case, combine them with # the line above i = 0 for line in lines: lines[i] = lines[i].strip() if ',' not in lines[i]: lines[i-1] = ' '.join(lines[i-1:i+1]) lines.remove(lines[i]) i += 1 for line in lines: parts = [part.strip() for part in line.split(",")] if (u'\u2013' in parts[0] or '_' in parts[0]): parts[0] = "vacant" titles_with_commas = ["Director", "Acting Director"] contained = [part for part in parts if part in titles_with_commas] if contained != []: i = parts.index(contained[0]) parts[i:i + 2] = [', '.join(parts[i:i + 2])] cabinet[parts[0]] = parts[1:] # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(cabinet) return cabinet @property def departments(self): # approximately the correct chunk of lines lines = self.f.get_lines_between("MAYOR", "MUNICIPAL COURT") # clean out lines before first "DEPT" and ward lines that made it into # the chunk of lines because of the table layout of the pdf lines = self.clean_dept_lines(lines) if (self.year == 1996): print (lines) # split into various bodies (each dept, board, or commission) # each body is still a list of strings after this step completes bodies = self.split_depts(lines) new_bodies = [] # clean up each body, repeat this loop as many times as there are bodies # when the code reaches this line for body in bodies: # starting point new_body = self.create_new_body(body) # first line is an unusual case first = new_body['lines'] if (type(first) == list): first = new_body["lines"][0] # if it starts with a room number, give the department a location if first[:4] == "Room": # Room# is either separated from the rest of the line by a # hyphen or a comma. Ignore if it is a part of a title if '-' in first and not any(t in first for t in Person.titles): split_pos = first.index('-') else: split_pos = first.index(',') # set the location new_body["location"] = first[:split_pos] # remove this portion of the line because it's now stored # elsewhere. To be extra confusing, new_body will be a list if # it's a dept otherwise it'll be a string if type(new_body) == list: new_body["lines"][0] = first[split_pos + 1:].strip() else: new_body["lines"] = first[split_pos + 1:].strip() else: new_body["location"] = '' new_bodies.append(new_body) all_bodies = { "departments": [], "boards": [], "commissions": [] } for body in new_bodies: if "DEPT" in body["name"]: k = "departments" elif "BOARD" in body["name"]: k = "boards" else: k = "commissions" all_bodies[k].append(body) all_bodies['boards'] = self.create_boards(all_bodies["boards"]) all_bodies['commissions'] = self.create_commissions(all_bodies["commissions"]) all_bodies['departments'] = self.create_departments(all_bodies['departments']) return all_bodies def create_new_body(self, body): new_body = { "name" : '', "lines" : [] } # first line looks like "NAME OF DEPT/BOARD/COMMISSION - ..." first_line = body[0] # subtitles (in parentheses) get hyphens before and after them # the following code gets the first hyphen if there is no subtitle and # the second hyphen if there is a subtitle hyphens = [m.start() for m in re.finditer("-", first_line)] split_pos = hyphens[1] if '(' in first_line else hyphens[0] # the name is first_line from the beginning -> split_pos new_body["name"] = first_line[:split_pos].strip() # remove the name from the first element, strip whitespace body[0] = first_line[split_pos + 1:].strip() # join the lines together for boards or commissions, these are lists of # names/positions where the whitespace doesn't matter if "DEPT" in new_body["name"]: new_body['lines'] = body else: new_body['lines'] = ' '.join(body) return new_body def create_departments(self, depts): new_depts = [] ''' before: depts is a list of basic department objects these look like: { name: dept name, location: location, lines: [all lines between this dept and the next] } after: new_depts is a list of department objects a department looks like: { name: dept name, lines: [lines that aren't part of a division], location: location, divisions: {division} } ''' for dept in depts: new_dept = { 'name': dept['name'], 'location': '', 'divisions': {}, 'members': []} if 'location' in dept: new_dept['location'] = dept['location'] for i in range (0, len(dept['lines'])): line = dept['lines'][i] if Record.contains_with_spaces('OFFICES', line): offices, members, leftovers = self.create_offices(dept['lines'][i:], dept['name']) new_dept['offices'] = offices divs, members = self.create_divisions(leftovers, dept['name']) new_dept['divisions'] = divs new_dept['members'] += members break elif Record.contains_with_spaces('DIVISIONS', line): divs, members = self.create_divisions(dept['lines'][i:], dept['name']) new_dept['divisions'] = divs new_dept['members'] = members break else: new_dept['members'].append(Person.get_people(line, department=dept['name'])) new_depts.append(new_dept) return new_depts def clean_dept_lines(self, lines): # remove whitespace lines = [line.strip() for line in lines if line != "\n"] # remove lines before first "DEPT" i = 0 while True: line = lines[i] if not re.search('[A-Z]{4}', line): lines.remove(line) else: break # Ward lines, if they exist lines = self.remove_ward_lines(lines) # Replace abbreviations with the full word for more consistency lines = self.replace_abbreviations(lines) return lines def replace_abbreviations(self, lines): for i in range(0, len(lines)): line = lines[i] line = line.replace("Rm.", "Room") line = line.replace("Sec'y.", "Secretary") line = line.replace("Exec.", "Executive") line = line.replace("Chrm.", "Chairman") line = line.replace("Asst.", "Assistant") line = line.replace("Ro om", "Room") line = line.replace("R oo m", "Room") line = line.replace("Act. Mgr.", "Account Manager") line = line.replace("DE PT", "DEPT") # two passes line = line.replace("_ _", "__", 10) line = line.replace("_ _", "__", 10) line = line.replace("CLEV ELA ND", "CLEVELAND") line = line.replace("COMMISS ION", "COMMISSION") lines[i] = line return lines def create_boards(self, boards): boards = self.clean_boards(boards) for board in boards: members = Person.get_people(board['lines'], department=board['name']) board['members'] = members del board['lines'] return boards def clean_boards(self, boards): return boards def create_commissions(self, commissions): return self.create_boards(commissions) def remove_ward_lines(self, lines): # skip method if these lines aren't present if "Ward\n" not in lines: return lines i = 0 found = False while True: line = lines[i] if line == "Ward\n": lines.remove(line) found = True elif found and re.search('\d\n', line): lines.remove(line) elif found: break else: i += 1 return lines def split_depts(self, lines): depts = [] current_dept = [] previous = '' for line in lines: # start of a new thing - # "DEPT OF..." or "BOARD OF..." or "SOMETHING COMMISSION" # add contents of current_dept to the list of output depts if re.search('[A-Z]{4}', line) and current_dept != [] and \ not Record.contains_with_spaces('DIVISIONS', line) and \ not Record.contains_with_spaces('OFFICES', line): # add current_dept to list and reset current_dept/previous line depts.append(current_dept) current_dept, previous = [], '' # if the current line is if self.is_incomplete_line(line, previous): if self.year == 1996: print (line) # get rid of "|||"s that are being used as spacing line.replace('|', '', 10) current_dept[-1] = ' '.join([current_dept[-1], line]) else: current_dept.append(line.strip()) previous = line.strip() # add the last department to the output depts.append(current_dept) return depts def is_incomplete_line(self, current, previous): """ Return whether a line in a dept/board/commission is incomplete""" # lines that contain "DIVISIONS" or "OFFICES" don't count if self.contains_with_spaces('DIVISIONS', current) or \ self.contains_with_spaces('OFFICES', current): return False # "||||" represents spacing, formatting issue with the conversion from # pdf to txt tabbed_line = '|' in current # the previous line ended with a comma (and is not empty string) line_following_comma = previous != '' and previous[-1] == "," # too short to be a full line not_enough_words = current.count(' ') < 3 and \ not self.contains_with_spaces('DIVISION', current) # starts with numbers -> starts with an address # a complete line doesn't start with an address starts_with_digits = re.search('^(\d{3})', current) outlier_cases = ['Flr., Court Towers, 1200 Ontario', '', 'Criminal Branch-Justice Center, 8th'] return tabbed_line or line_following_comma or not_enough_words or \ starts_with_digits or (current in outlier_cases) def clean_sub_depts(self, lines): # a set of divisions/offices looks like: # "DIVISIONS -" or "DIVISIONS \" or "DIVISIONS:" if '-' in lines[0]: split_char = '-' elif '\\' in lines[0]: split_char = '\\' else: split_char = ':' # remove "DIVISIONS" and whatever punctuation follows it lines[0] = lines[0][lines[0].index(split_char) + 1:].strip() # for some reason, the rest of the line doesn't matter if there are # fewer than 3 spaces. Don't remember why. if lines[0].count(' ') < 3: lines.remove(lines[0]) lines = [line for line in lines if line != ''] return lines def create_offices(self, lines, dept_name): offices = [] people = [] lines = self.clean_sub_depts(lines) for i in range(0, len(lines)): line = lines[i] if "DIVISIONS" in line: leftovers = lines[i:] break if '-' in line: parts = line.split('-') else: if '\\' in line: split_char = '\\' else: split_char = ',' i = line.index(split_char) parts = [line[:i], line[i + 1:]] division = parts[0].strip() person = parts[1].strip() offices.append(division) people.append(Person.get_people(person, department=dept_name, division=division)) return offices, people, leftovers def create_divisions(self, lines, dept_name): divs = [] people = [] lines = self.clean_sub_depts(lines) for line in lines: if '-' in line: parts = line.split('-') else: if '\\' in line: split_char = '\\' else: split_char = ',' #print (line) i = line.index(split_char) parts = [line[:i], line[i + 1:]] division = parts[0].strip() person = parts[1].strip() divs.append(division) people.append(Person.get_people(person, department=dept_name, division=division)) return divs, people @staticmethod def contains_with_spaces(keyword, line): line = line.replace(' ', '') return keyword in line def get_prop(self, name): if len(self.rules[name][self.year]) == 1: return eval('self.f.' + self.rules[name][self.year][0]) else: for rule in self.rules[name][self.year]: attempt = eval('self.f.' + rule) if attempt not in INVALID_VALS: return attempt return "Not found" def __repr__(self): return "<Record, date="+self.date+">"
class Record: """ #todo description of the record class """ def __init__(self, filename, rules): """Create record class from filename and rules object""" self.date = filename[:filename.index('.')] self.year = int(self.date[0:4]) self.f = RecordFile(DIR + self.date[0:4] + '/' + filename) self.rules = rules @property def mayor(self): return self.get_prop('mayor') @property def council_president(self): return self.get_prop('council_president') @property def clerk(self): return self.get_prop('clerk') @property def council_members(self): # Grab 80 lines after the line containing "Residence" as a starting pt. lines = self.f.get_lines_after("Residence", 80) # lines that contain the name and address, look like: # Council Member Name ..................... Address main_lines = [line.rstrip() for line in lines if "..." in line] # Sometimes the lines don't contain elipses. If that's the case, use # an alternative method to get the council members if main_lines == []: return self.get_council_members() # Get the lines containing just the zip code zip_code_lines = [ line.rstrip() for line in lines if re.search('\d{5}\n', line) ] # this is in the form of: # { # 'Council member 1 name': { # 'address': '123 Street Address', # 'zipcode': '12345' # }, # 'Council member 2 name': {... # },... # } # # Wrong. Should be a list of people objects. # Position = Council member # Address = Address # Zipcode = Zipcode council_members = {} # keep track of zip codes separately in case an address is missing zipcode_ptr = 0 for i in range(0, len(main_lines)): # name is the contents up until "..", remove whitespace with # rstrip() name = main_lines[i][:main_lines[i].index('..')].rstrip() # address is everything after the last index of ".." address = main_lines[i][main_lines[i].rfind('..') + 2:] # no addres --> no zipcode if address == "": zipcode = "" else: #most lines have zip codes zipcode = zip_code_lines[zipcode_ptr] zipcode_ptr += 1 # create the council member council_members[name] = {'address': address, 'zipcode': zipcode} # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(council_members) return council_members def get_council_members(self): """Alternative method of getting the council members. This is for the case where there are no elipses between the name and address """ lines = self.f.get_lines_after("Name Residence", 25) lines = [line.rstrip() for line in lines if line[0].isdigit()] council_members = {} for line in lines: line = line[2:].strip() if ("P.O. Box") in line: name = line[:line.index("P.O. Box")].rstrip() else: name = line[:re.search('\d', line).start()].rstrip() line = line.replace(name, "") zipcode = line[-5:] address = line.replace(zipcode, "").strip() council_members[name] = {'address': address, 'zipcode': zipcode} # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(council_members) return council_members @property def cabinet(self): lines = self.f.get_lines_between( "MAYOR", ["OFFICE OF", "DEPT.", "Ward\n", re.compile("\d{5}")]) cabinet = {} # Some titles don't fit on their line. In this case, combine them with # the line above i = 0 for line in lines: lines[i] = lines[i].strip() if ',' not in lines[i]: lines[i - 1] = ' '.join(lines[i - 1:i + 1]) lines.remove(lines[i]) i += 1 for line in lines: parts = [part.strip() for part in line.split(",")] if (u'\u2013' in parts[0] or '_' in parts[0]): parts[0] = "vacant" titles_with_commas = ["Director", "Acting Director"] contained = [part for part in parts if part in titles_with_commas] if contained != []: i = parts.index(contained[0]) parts[i:i + 2] = [', '.join(parts[i:i + 2])] cabinet[parts[0]] = parts[1:] # pp = pprint.PrettyPrinter(indent=4) # pp.pprint(cabinet) return cabinet @property def departments(self): # approximately the correct chunk of lines lines = self.f.get_lines_between("MAYOR", "MUNICIPAL COURT") # clean out lines before first "DEPT" and ward lines that made it into # the chunk of lines because of the table layout of the pdf lines = self.clean_dept_lines(lines) if (self.year == 1996): print(lines) # split into various bodies (each dept, board, or commission) # each body is still a list of strings after this step completes bodies = self.split_depts(lines) new_bodies = [] # clean up each body, repeat this loop as many times as there are bodies # when the code reaches this line for body in bodies: # starting point new_body = self.create_new_body(body) # first line is an unusual case first = new_body['lines'] if (type(first) == list): first = new_body["lines"][0] # if it starts with a room number, give the department a location if first[:4] == "Room": # Room# is either separated from the rest of the line by a # hyphen or a comma. Ignore if it is a part of a title if '-' in first and not any(t in first for t in Person.titles): split_pos = first.index('-') else: split_pos = first.index(',') # set the location new_body["location"] = first[:split_pos] # remove this portion of the line because it's now stored # elsewhere. To be extra confusing, new_body will be a list if # it's a dept otherwise it'll be a string if type(new_body) == list: new_body["lines"][0] = first[split_pos + 1:].strip() else: new_body["lines"] = first[split_pos + 1:].strip() else: new_body["location"] = '' new_bodies.append(new_body) all_bodies = {"departments": [], "boards": [], "commissions": []} for body in new_bodies: if "DEPT" in body["name"]: k = "departments" elif "BOARD" in body["name"]: k = "boards" else: k = "commissions" all_bodies[k].append(body) all_bodies['boards'] = self.create_boards(all_bodies["boards"]) all_bodies['commissions'] = self.create_commissions( all_bodies["commissions"]) all_bodies['departments'] = self.create_departments( all_bodies['departments']) return all_bodies def create_new_body(self, body): new_body = {"name": '', "lines": []} # first line looks like "NAME OF DEPT/BOARD/COMMISSION - ..." first_line = body[0] # subtitles (in parentheses) get hyphens before and after them # the following code gets the first hyphen if there is no subtitle and # the second hyphen if there is a subtitle hyphens = [m.start() for m in re.finditer("-", first_line)] split_pos = hyphens[1] if '(' in first_line else hyphens[0] # the name is first_line from the beginning -> split_pos new_body["name"] = first_line[:split_pos].strip() # remove the name from the first element, strip whitespace body[0] = first_line[split_pos + 1:].strip() # join the lines together for boards or commissions, these are lists of # names/positions where the whitespace doesn't matter if "DEPT" in new_body["name"]: new_body['lines'] = body else: new_body['lines'] = ' '.join(body) return new_body def create_departments(self, depts): new_depts = [] ''' before: depts is a list of basic department objects these look like: { name: dept name, location: location, lines: [all lines between this dept and the next] } after: new_depts is a list of department objects a department looks like: { name: dept name, lines: [lines that aren't part of a division], location: location, divisions: {division} } ''' for dept in depts: new_dept = { 'name': dept['name'], 'location': '', 'divisions': {}, 'members': [] } if 'location' in dept: new_dept['location'] = dept['location'] for i in range(0, len(dept['lines'])): line = dept['lines'][i] if Record.contains_with_spaces('OFFICES', line): offices, members, leftovers = self.create_offices( dept['lines'][i:], dept['name']) new_dept['offices'] = offices divs, members = self.create_divisions( leftovers, dept['name']) new_dept['divisions'] = divs new_dept['members'] += members break elif Record.contains_with_spaces('DIVISIONS', line): divs, members = self.create_divisions( dept['lines'][i:], dept['name']) new_dept['divisions'] = divs new_dept['members'] = members break else: new_dept['members'].append( Person.get_people(line, department=dept['name'])) new_depts.append(new_dept) return new_depts def clean_dept_lines(self, lines): # remove whitespace lines = [line.strip() for line in lines if line != "\n"] # remove lines before first "DEPT" i = 0 while True: line = lines[i] if not re.search('[A-Z]{4}', line): lines.remove(line) else: break # Ward lines, if they exist lines = self.remove_ward_lines(lines) # Replace abbreviations with the full word for more consistency lines = self.replace_abbreviations(lines) return lines def replace_abbreviations(self, lines): for i in range(0, len(lines)): line = lines[i] line = line.replace("Rm.", "Room") line = line.replace("Sec'y.", "Secretary") line = line.replace("Exec.", "Executive") line = line.replace("Chrm.", "Chairman") line = line.replace("Asst.", "Assistant") line = line.replace("Ro om", "Room") line = line.replace("R oo m", "Room") line = line.replace("Act. Mgr.", "Account Manager") line = line.replace("DE PT", "DEPT") # two passes line = line.replace("_ _", "__", 10) line = line.replace("_ _", "__", 10) line = line.replace("CLEV ELA ND", "CLEVELAND") line = line.replace("COMMISS ION", "COMMISSION") lines[i] = line return lines def create_boards(self, boards): boards = self.clean_boards(boards) for board in boards: members = Person.get_people(board['lines'], department=board['name']) board['members'] = members del board['lines'] return boards def clean_boards(self, boards): return boards def create_commissions(self, commissions): return self.create_boards(commissions) def remove_ward_lines(self, lines): # skip method if these lines aren't present if "Ward\n" not in lines: return lines i = 0 found = False while True: line = lines[i] if line == "Ward\n": lines.remove(line) found = True elif found and re.search('\d\n', line): lines.remove(line) elif found: break else: i += 1 return lines def split_depts(self, lines): depts = [] current_dept = [] previous = '' for line in lines: # start of a new thing - # "DEPT OF..." or "BOARD OF..." or "SOMETHING COMMISSION" # add contents of current_dept to the list of output depts if re.search('[A-Z]{4}', line) and current_dept != [] and \ not Record.contains_with_spaces('DIVISIONS', line) and \ not Record.contains_with_spaces('OFFICES', line): # add current_dept to list and reset current_dept/previous line depts.append(current_dept) current_dept, previous = [], '' # if the current line is if self.is_incomplete_line(line, previous): if self.year == 1996: print(line) # get rid of "|||"s that are being used as spacing line.replace('|', '', 10) current_dept[-1] = ' '.join([current_dept[-1], line]) else: current_dept.append(line.strip()) previous = line.strip() # add the last department to the output depts.append(current_dept) return depts def is_incomplete_line(self, current, previous): """ Return whether a line in a dept/board/commission is incomplete""" # lines that contain "DIVISIONS" or "OFFICES" don't count if self.contains_with_spaces('DIVISIONS', current) or \ self.contains_with_spaces('OFFICES', current): return False # "||||" represents spacing, formatting issue with the conversion from # pdf to txt tabbed_line = '|' in current # the previous line ended with a comma (and is not empty string) line_following_comma = previous != '' and previous[-1] == "," # too short to be a full line not_enough_words = current.count(' ') < 3 and \ not self.contains_with_spaces('DIVISION', current) # starts with numbers -> starts with an address # a complete line doesn't start with an address starts_with_digits = re.search('^(\d{3})', current) outlier_cases = [ 'Flr., Court Towers, 1200 Ontario', '', 'Criminal Branch-Justice Center, 8th' ] return tabbed_line or line_following_comma or not_enough_words or \ starts_with_digits or (current in outlier_cases) def clean_sub_depts(self, lines): # a set of divisions/offices looks like: # "DIVISIONS -" or "DIVISIONS \" or "DIVISIONS:" if '-' in lines[0]: split_char = '-' elif '\\' in lines[0]: split_char = '\\' else: split_char = ':' # remove "DIVISIONS" and whatever punctuation follows it lines[0] = lines[0][lines[0].index(split_char) + 1:].strip() # for some reason, the rest of the line doesn't matter if there are # fewer than 3 spaces. Don't remember why. if lines[0].count(' ') < 3: lines.remove(lines[0]) lines = [line for line in lines if line != ''] return lines def create_offices(self, lines, dept_name): offices = [] people = [] lines = self.clean_sub_depts(lines) for i in range(0, len(lines)): line = lines[i] if "DIVISIONS" in line: leftovers = lines[i:] break if '-' in line: parts = line.split('-') else: if '\\' in line: split_char = '\\' else: split_char = ',' i = line.index(split_char) parts = [line[:i], line[i + 1:]] division = parts[0].strip() person = parts[1].strip() offices.append(division) people.append( Person.get_people(person, department=dept_name, division=division)) return offices, people, leftovers def create_divisions(self, lines, dept_name): divs = [] people = [] lines = self.clean_sub_depts(lines) for line in lines: if '-' in line: parts = line.split('-') else: if '\\' in line: split_char = '\\' else: split_char = ',' #print (line) i = line.index(split_char) parts = [line[:i], line[i + 1:]] division = parts[0].strip() person = parts[1].strip() divs.append(division) people.append( Person.get_people(person, department=dept_name, division=division)) return divs, people @staticmethod def contains_with_spaces(keyword, line): line = line.replace(' ', '') return keyword in line def get_prop(self, name): if len(self.rules[name][self.year]) == 1: return eval('self.f.' + self.rules[name][self.year][0]) else: for rule in self.rules[name][self.year]: attempt = eval('self.f.' + rule) if attempt not in INVALID_VALS: return attempt return "Not found" def __repr__(self): return "<Record, date=" + self.date + ">"