def preprocessOrgs(): orgs = csv.DictReader(open('CSV Files/reference/raw_org_list.csv','rU')) w = csv.writer(open('CSV Files/org_id_list_new.csv', 'wb')) w.writerow(['org_name', 'org_id']) count = 1; for o in orgs: old = o["org_name"] new = normalizeOrgs(old) w.writerow([new, count]) count += 1
def preprocessFACA(): for i in range(1): #14 date = 1997 + i oldDir = '/Users/kevinshin92/Research/datafeeds/FACAMemberLists/old/' newDir = '/Users/kevinshin92/Research/datafeeds/FACAMemberLists/new/' baseName = 'FACAMemberList%(date)d' % {'date':date} readFile = open(oldDir + baseName +'.csv', 'rU') writeFile = open(newDir + baseName + '_new.csv', 'wb') r = csv.DictReader(readFile) w = csv.DictWriter(writeFile, r.fieldnames) w.writerow(dict((fn,fn) for fn in r.fieldnames)) for row in r: old = row["OccupationOrAffiliation"] # Run regular expressions on occupations row["OccupationOrAffiliation"] = normalizeOrgs(old) # Test to see if prefix, first, middle, and suffix fields are empty organizeDates(row, "StartDate") organizeDates(row, "EndDate") if (row["FirstName"] == '' and row["MiddleName"] == ''): #Prefix/Suffix can be filled organizeNames(row) w.writerow(row) readFile.close() writeFile.close()