contact_names = set(contact_shelve[contact_ufids.pop()].keys()) data_in = read_csv_fp(sys.stdin) print >>sys.stderr, len(data_in) found = 0 not_found = 0 data_out = {} for row, data in data_in.items(): new_data = dict(data) if data['UFID'] in contact_ufids: found += 1 contact_data = contact_shelve[data['UFID']] for name, value in contact_data.items(): new_data[name] = value new_data['UF_BUSINESS_FAX'] = improve_phone_number(new_data['UF_BUSINESS_FAX']) new_data['UF_BUSINESS_PHONE'] = improve_phone_number(new_data['UF_BUSINESS_PHONE']) new_data['DISPLAY_NAME'] = improve_display_name(new_data['DISPLAY_NAME']) new_data['WORKINGTITLE'] = improve_jobcode_description(new_data['WORKINGTITLE']) else: not_found += 1 for name in contact_names: new_data[name] = '' data_out[row] = new_data print >>sys.stderr, 'Found', found print >>sys.stderr, 'Not found', not_found write_csv_fp(sys.stdout, data_out) contact_shelve.close()
# Add these columns new_data['remove'] = '' new_data['uri'] = '' new_data['display_name'] = '' new_data['first'] = '' new_data['last'] = '' new_data['middle'] = '' new_data['suffix'] = '' new_data['corresponding'] = '' new_data['uf'] = '' # Delete everything not in the keep_names set for name in new_data.keys(): if name not in keep_names: del new_data[name] for author in author_data: row_out += 1 data_out[row_out] = dict(new_data) for key in author.keys(): data_out[row_out][key] = author[key] if key == 'display_name': data_out[row_out][key] = improve_display_name(author[key]) column_names_out = data_out[1].keys() print_err("==> {} columns in the output: {}" .format(len(column_names_out), column_names_out)) write_csv_fp(sys.stdout, data_out)
# Add these columns new_data['remove'] = '' new_data['uri'] = '' new_data['display_name'] = '' new_data['first'] = '' new_data['last'] = '' new_data['middle'] = '' new_data['suffix'] = '' new_data['corresponding'] = '' new_data['uf'] = '' # Delete everything not in the keep_names set for name in new_data.keys(): if name not in keep_names: del new_data[name] for author in author_data: row_out += 1 data_out[row_out] = dict(new_data) for key in author.keys(): data_out[row_out][key] = author[key] if key == 'display_name': data_out[row_out][key] = improve_display_name(author[key]) var_names = data_out[ data_out.keys()[1]].keys() # create a list of var_names from the first row print >> sys.stderr, "Columns out", var_names write_csv_fp(sys.stdout, data_out)
def test_standard_case(self): in_name = "CONLON,MIKE" out_name = improve_display_name(in_name) self.assertEqual("Conlon, Mike", out_name)
def test_no_op(self): in_name = "Conlon, Mike" out_name = improve_display_name(in_name) self.assertEqual(in_name, out_name)
import sys contact_shelve = shelve.open("contact.db") contact_ufids = set(contact_shelve.keys()) # a set of ufids that will not be in the output contact_names = set(contact_shelve[contact_ufids.pop()].keys()) data_in = read_csv_fp(sys.stdin) print >>sys.stderr, len(data_in) found = 0 not_found = 0 data_out = {} for row, data in data_in.items(): new_data = dict(data) if data["UFID"] in contact_ufids: found += 1 contact_data = contact_shelve[data["UFID"]] for name, value in contact_data.items(): new_data[name] = value new_data["UF_BUSINESS_FAX"] = improve_phone_number(new_data["UF_BUSINESS_FAX"]) new_data["UF_BUSINESS_PHONE"] = improve_phone_number(new_data["UF_BUSINESS_PHONE"]) new_data["DISPLAY_NAME"] = improve_display_name(new_data["DISPLAY_NAME"]) new_data["WORKINGTITLE"] = improve_jobcode_description(new_data["WORKINGTITLE"]) else: not_found += 1 for name in contact_names: new_data[name] = "" data_out[row] = new_data print >>sys.stderr, "Found", found print >>sys.stderr, "Not found", not_found write_csv_fp(sys.stdout, data_out) contact_shelve.close()