def names_parse(self): #header_fields = ["Property_Name", "Name", "Source"] people = [] count = 0 for excel_file in self.excel_files: print excel_file count += 1 print count workbook = xlrd.open_workbook(excel_file) worksheet = workbook.sheet_by_index(0) name_index = 0 event_date_index = 0 source_index = 0 status_index = 0 start = False end = False prop_name_cell = "" person = {} for row_index in range(worksheet.nrows): cell = worksheet.cell(row_index, 0).value cell = cell.strip() cell = cell.replace('\n','') if 'property:' in cell.lower(): prop_name_cell = worksheet.cell(row_index,0).value elif cell in self.headers: start = True header_row = worksheet.row_values(row_index,0) #Find the column index of the different header row names for i, c in enumerate(header_row): if 'name' in c.lower() and 'agent' not in c.lower(): #print c, i name_index = i #start = True elif 'event' in c.lower() and 'date' in c.lower(): event_date_index = i elif 'source' in c.lower(): source_index = i elif 'status' in c.lower(): status_index = i #The row with cell "Grand Total" is used as the flag to signal the end of name collecting. elif 'grand' in cell.lower() and 'total' in cell.lower(): end = True elif end: continue #Begin collecting names after getting past the header row elif start: #print cell if cell.replace(" ", "") in [i.replace(" ", "") for i in self.section_titles]: section_title_cell = cell name_cell = worksheet.cell(row_index, name_index).value name_cell = name_cell.encode('utf-8') event_date_cell = worksheet.cell(row_index, event_date_index).value source_cell = worksheet.cell(row_index, source_index).value status_cell = worksheet.cell(row_index, status_index).value if name_cell != "": #writer.writerow([prop_name_cell, name_cell, source_cell]) prop_key = re.sub("\(\d*\)?", "", prop_name_cell).strip().lower() prop_key = " ".join(prop_key.split()) if self.args.raw == True: person = {'Property ID': self.keys.get(prop_key, ''), 'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Status': status_cell, 'Event Date': event_date_cell,'Action': section_title_cell} else: #print name_cell person = {'Property ID': self.keys.get(prop_key, ''), 'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Status': status_cell, 'Event Date': event_date_cell,'Action': section_title_cell} processed_name = eagle.namer(name_cell) person['First Name'] = processed_name[0].encode('utf-8') person['Last Name'] = processed_name[1].encode('utf-8') #person = {'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Event Date': event_date_cell,'Action': section_title_cell} #Add person dict to people list people.append(person) with open(self.output, 'wb') as out_file: if self.args.raw == True: out_file.write(u'\ufeff'.encode('utf8')) #BOM for Excel to open UTF-8 file properly writer = csv.DictWriter(out_file, fieldnames=self.header_fields_raw) writer.writeheader() for person in people: #print person writer.writerow(person) else: out_file.write(u'\ufeff'.encode('utf8')) #BOM for Excel to open UTF-8 file properly writer = csv.DictWriter(out_file, fieldnames=self.header_fields) writer.writeheader() for person in people: if self.args.filter == True: status = person['Status'].lower() if 'resident' in status: writer.writerow(person) else: writer.writerow(person)
def names_parse(self): #header_fields = ["Property_Name", "Name", "Source"] people = [] for excel_file in self.excel_files: workbook = xlrd.open_workbook(excel_file) worksheet = workbook.sheet_by_index(0) name_index = 0 event_date_index = 0 source_index = 0 status_index = 0 start = False end = False prop_name_cell = "" person = {} for row_index in range(worksheet.nrows): cell = worksheet.cell(row_index, 0).value cell = cell.strip() cell = cell.replace('\n', '') if 'property:' in cell.lower(): prop_name_cell = worksheet.cell(row_index, 0).value elif cell in self.headers: start = True header_row = worksheet.row_values(row_index, 0) #Find the column index of the different header row names for i, c in enumerate(header_row): if 'name' in c.lower() and 'agent' not in c.lower(): print c, i name_index = i #start = True elif 'event' in c.lower() and 'date' in c.lower(): event_date_index = i elif 'source' in c.lower(): source_index = i elif 'status' in c.lower(): status_index = i #The row with cell "Grand Total" is used as the flag to signal the end of name collecting. elif 'grand' in cell.lower() and 'total' in cell.lower(): end = True elif end: continue #Begin collecting names after getting past the header row elif start: if cell in self.section_titles: section_title_cell = cell name_cell = worksheet.cell(row_index, name_index).value event_date_cell = worksheet.cell(row_index, event_date_index).value source_cell = worksheet.cell(row_index, source_index).value status_cell = worksheet.cell(row_index, status_index).value if name_cell != "": #writer.writerow([prop_name_cell, name_cell, source_cell]) prop_key = re.sub("\(\d*\)?", "", prop_name_cell).strip().lower() prop_key = " ".join(prop_key.split()) if self.args.raw == True: person = { 'Property ID': self.keys.get(prop_key, ''), 'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Status': status_cell, 'Event Date': event_date_cell, 'Action': section_title_cell } else: person = { 'Property ID': self.keys.get(prop_key, ''), 'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Status': status_cell, 'Event Date': event_date_cell, 'Action': section_title_cell } processed_name = eagle.namer(name_cell) person['First Name'] = processed_name[0] person['Last Name'] = processed_name[1] #person = {'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Event Date': event_date_cell,'Action': section_title_cell} #Add person dict to people list people.append(person) with open(self.output, 'wb') as out_file: if self.args.raw == True: writer = csv.DictWriter(out_file, fieldnames=self.header_fields_raw) writer.writeheader() for person in people: writer.writerow(person) else: writer = csv.DictWriter(out_file, fieldnames=self.header_fields) writer.writeheader() for person in people: writer.writerow(person)
def names_parse(self): people = [] count = 0 for excel_file in self.excel_files: print excel_file count += 1 print count workbook = xlrd.open_workbook(excel_file) worksheet = workbook.sheet_by_index(0) name_index = 0 event_date_index = 0 source_index = 0 status_index = 0 event_type_index = 0 home_phone_index = 0 office_phone_index = 0 start = False end = False prop_name_cell = worksheet.cell(1, 0).value person = {} for row_index in range(worksheet.nrows): cell = worksheet.cell(row_index, 0).value cell = cell.strip() cell = cell.replace("\n", "") if cell in self.headers: start = True header_row = worksheet.row_values(row_index, 0) # Find the column index of the different header row names for i, c in enumerate(header_row): if "prospect" and "name" in c.lower(): name_index = i elif "event" and "date" in c.lower(): event_date_index = i elif "source" in c.lower(): source_index = i elif "status" in c.lower(): status_index = i elif "event" and "type" in c.lower(): event_type_index = i elif "home" in c.lower() and "phone" in c.lower(): home_phone_index = i elif "office" in c.lower() and "phone" in c.lower(): office_phone_index = i elif "event" and "summary" in cell.lower(): end = True elif end: continue elif start: row = worksheet.row_values(row_index, 0) name_cell = worksheet.cell(row_index, name_index).value name_cell = name_cell.encode("utf-8") event_date_cell = worksheet.cell(row_index, event_date_index).value source_cell = worksheet.cell(row_index, source_index).value status_cell = worksheet.cell(row_index, status_index).value event_type_cell = worksheet.cell(row_index, event_type_index).value home_phone_cell = worksheet.cell(row_index, home_phone_index).value office_phone_cell = worksheet.cell(row_index, office_phone_index).value if name_cell != "" and source_cell != "": # Need to add prop_name_cell to prop_key conversion prop_key = prop_name_cell if self.args.raw == True: person = { "Property ID": self.keys.get(prop_key, ""), "Property_Name": prop_name_cell, "Name": name_cell, "Source": source_cell, "Status": status_cell, "Event Date": event_date_cell, "Event Type": event_type_cell, "Home Phone": home_phone_cell, "Office Phone": office_phone_cell, } else: person = { "Property ID": self.keys.get(prop_key, ""), "Property_Name": prop_name_cell, "Name": name_cell, "Source": source_cell, "Status": status_cell, "Event Date": event_date_cell, "Event Type": event_type_cell, "Home Phone": home_phone_cell, "Office Phone": office_phone_cell, } processed_name = eagle.namer(name_cell) person["First Name"] = processed_name[0].encode("utf-8") person["Last Name"] = processed_name[1].encode("utf-8") # Add person dict to people list people.append(person) with open(self.output, "wb") as out_file: if self.args.raw == True: out_file.write(u"\ufeff".encode("utf8")) # BOM for Excel to open UTF-8 file properly writer = csv.DictWriter(out_file, fieldnames=self.header_fields_raw) writer.writeheader() for person in people: # print person writer.writerow(person) else: out_file.write(u"\ufeff".encode("utf8")) # BOM for Excel to open UTF-8 file properly writer = csv.DictWriter(out_file, fieldnames=self.header_fields) writer.writeheader() for person in people: if self.args.filter == True: status = person["Status"].lower() if "resident" in status: writer.writerow(person) else: writer.writerow(person)
def names_parse(self): # header_fields = ["Property_Name", "Name", "Source"] people = [] for excel_file in self.excel_files: workbook = xlrd.open_workbook(excel_file) worksheet = workbook.sheet_by_index(0) name_index = 0 event_date_index = 0 source_index = 0 status_index = 0 start = False end = False prop_name_cell = "" person = {} for row_index in range(worksheet.nrows): cell = worksheet.cell(row_index, 0).value cell = cell.strip() cell = cell.replace("\n", "") if "property:" in cell.lower(): prop_name_cell = worksheet.cell(row_index, 0).value elif cell in self.headers: start = True header_row = worksheet.row_values(row_index, 0) # Find the column index of the different header row names for i, c in enumerate(header_row): if "name" in c.lower() and "agent" not in c.lower(): print c, i name_index = i # start = True elif "event" in c.lower() and "date" in c.lower(): event_date_index = i elif "source" in c.lower(): source_index = i elif "status" in c.lower(): status_index = i # The row with cell "Grand Total" is used as the flag to signal the end of name collecting. elif "grand" in cell.lower() and "total" in cell.lower(): end = True elif end: continue # Begin collecting names after getting past the header row elif start: if cell in self.section_titles: section_title_cell = cell name_cell = worksheet.cell(row_index, name_index).value event_date_cell = worksheet.cell(row_index, event_date_index).value source_cell = worksheet.cell(row_index, source_index).value status_cell = worksheet.cell(row_index, status_index).value if name_cell != "": # writer.writerow([prop_name_cell, name_cell, source_cell]) prop_key = re.sub("\(\d*\)?", "", prop_name_cell).strip().lower() prop_key = " ".join(prop_key.split()) if self.args.raw == True: person = { "Property ID": self.keys.get(prop_key, ""), "Property_Name": prop_name_cell, "Name": name_cell, "Source": source_cell, "Status": status_cell, "Event Date": event_date_cell, "Action": section_title_cell, } else: person = { "Property ID": self.keys.get(prop_key, ""), "Property_Name": prop_name_cell, "Name": name_cell, "Source": source_cell, "Status": status_cell, "Event Date": event_date_cell, "Action": section_title_cell, } processed_name = eagle.namer(name_cell) person["First Name"] = processed_name[0] person["Last Name"] = processed_name[1] # person = {'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Event Date': event_date_cell,'Action': section_title_cell} # Add person dict to people list people.append(person) with open(self.output, "wb") as out_file: if self.args.raw == True: writer = csv.DictWriter(out_file, fieldnames=self.header_fields_raw) writer.writeheader() for person in people: writer.writerow(person) else: writer = csv.DictWriter(out_file, fieldnames=self.header_fields) writer.writeheader() for person in people: if self.args.filter == True: status = person["Status"].lower() if "resident" in status: writer.writerow(person) else: writer.writerow(person)
def names_parse(self): #header_fields = ["Property_Name", "Name", "Source"] people = [] for excel_file in self.excel_files: workbook = xlrd.open_workbook(excel_file) worksheet = workbook.sheet_by_index(0) name_index = 0 event_date_index = 0 source_index = 0 start = False end = False prop_name_cell = "" person = {} for row_index in range(worksheet.nrows): cell = worksheet.cell(row_index, 0).value cell = cell.strip() cell = cell.replace('\n','') if 'property:' in cell.lower(): prop_name_cell = worksheet.cell(row_index,0).value elif cell in self.headers: start = True header_row = worksheet.row_values(row_index,0) #Find the column index of the different header row names for i, c in enumerate(header_row): if 'name' in c.lower() and 'agent' not in c.lower(): print c, i name_index = i #start = True elif 'event' in c.lower() and 'date' in c.lower(): event_date_index = i elif 'source' in c.lower(): source_index = i #The row with cell "Grand Total" is used as the flag to signal the end of name collecting. elif 'grand' in cell.lower() and 'total' in cell.lower(): end = True elif end: continue #Begin collecting names after getting past the header row elif start: if cell in self.section_titles: section_title_cell = cell name_cell = worksheet.cell(row_index, name_index).value event_date_cell = worksheet.cell(row_index, event_date_index).value source_cell = worksheet.cell(row_index, source_index).value if name_cell != "": #writer.writerow([prop_name_cell, name_cell, source_cell]) if self.args.raw == True: person = {'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Event Date': event_date_cell,'Action': section_title_cell} else: person = {'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Event Date': event_date_cell,'Action': section_title_cell} processed_name = eagle.namer(name_cell) person['First Name'] = processed_name[0] person['Last Name'] = processed_name[1] #person = {'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Event Date': event_date_cell,'Action': section_title_cell} #Add person dict to people list people.append(person) with open(self.output, 'wb') as out_file: if self.args.raw == True: writer = csv.DictWriter(out_file, fieldnames=self.header_fields_raw) writer.writeheader() for person in people: writer.writerow(person) else: writer = csv.DictWriter(out_file, fieldnames=self.header_fields) writer.writeheader() for person in people: writer.writerow(person)
def names_parse(self): people = [] count = 0 for excel_file in self.excel_files: print excel_file count += 1 print count workbook = xlrd.open_workbook(excel_file) worksheet = workbook.sheet_by_index(0) name_index = 0 event_date_index = 0 source_index = 0 status_index = 0 event_type_index = 0 home_phone_index = 0 office_phone_index = 0 start = False end = False prop_name_cell = worksheet.cell(1, 0).value person = {} for row_index in range(worksheet.nrows): cell = worksheet.cell(row_index, 0).value cell = cell.strip() cell = cell.replace('\n', '') if cell in self.headers: start = True header_row = worksheet.row_values(row_index, 0) #Find the column index of the different header row names for i, c in enumerate(header_row): if 'prospect' and 'name' in c.lower(): name_index = i elif 'event' and 'date' in c.lower(): event_date_index = i elif 'source' in c.lower(): source_index = i elif 'status' in c.lower(): status_index = i elif 'event' and 'type' in c.lower(): event_type_index = i elif 'home' in c.lower() and 'phone' in c.lower(): home_phone_index = i elif 'office' in c.lower() and 'phone' in c.lower(): office_phone_index = i elif 'event' and 'summary' in cell.lower(): end = True elif end: continue elif start: row = worksheet.row_values(row_index, 0) name_cell = worksheet.cell(row_index, name_index).value name_cell = name_cell.encode('utf-8') event_date_cell = worksheet.cell(row_index, event_date_index).value source_cell = worksheet.cell(row_index, source_index).value status_cell = worksheet.cell(row_index, status_index).value event_type_cell = worksheet.cell(row_index, event_type_index).value home_phone_cell = worksheet.cell(row_index, home_phone_index).value office_phone_cell = worksheet.cell( row_index, office_phone_index).value if name_cell != "" and source_cell != "": #Need to add prop_name_cell to prop_key conversion prop_key = prop_name_cell if self.args.raw == True: person = { 'Property ID': self.keys.get(prop_key, ''), 'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Status': status_cell, 'Event Date': event_date_cell, 'Event Type': event_type_cell, 'Home Phone': home_phone_cell, 'Office Phone': office_phone_cell } else: person = { 'Property ID': self.keys.get(prop_key, ''), 'Property_Name': prop_name_cell, 'Name': name_cell, 'Source': source_cell, 'Status': status_cell, 'Event Date': event_date_cell, 'Event Type': event_type_cell, 'Home Phone': home_phone_cell, 'Office Phone': office_phone_cell } processed_name = eagle.namer(name_cell) person['First Name'] = processed_name[0].encode( 'utf-8') person['Last Name'] = processed_name[1].encode( 'utf-8') #Add person dict to people list people.append(person) with open(self.output, 'wb') as out_file: if self.args.raw == True: out_file.write(u'\ufeff'.encode( 'utf8')) #BOM for Excel to open UTF-8 file properly writer = csv.DictWriter(out_file, fieldnames=self.header_fields_raw) writer.writeheader() for person in people: #print person writer.writerow(person) else: out_file.write(u'\ufeff'.encode( 'utf8')) #BOM for Excel to open UTF-8 file properly writer = csv.DictWriter(out_file, fieldnames=self.header_fields) writer.writeheader() for person in people: if self.args.filter == True: status = person['Status'].lower() if 'resident' in status: writer.writerow(person) else: writer.writerow(person)