Esempio n. 1
0
def preprocessOrgs():
	orgs = csv.DictReader(open('CSV Files/reference/raw_org_list.csv','rU'))
	w = csv.writer(open('CSV Files/org_id_list_new.csv', 'wb'))
	w.writerow(['org_name', 'org_id'])
	count = 1;
	for o in orgs:
		old = o["org_name"]
		new = normalizeOrgs(old)
		w.writerow([new, count])
		count += 1
Esempio n. 2
0
def preprocessFACA():
	for i in range(1): #14
		date = 1997 + i
		oldDir = '/Users/kevinshin92/Research/datafeeds/FACAMemberLists/old/'
		newDir = '/Users/kevinshin92/Research/datafeeds/FACAMemberLists/new/'
		baseName = 'FACAMemberList%(date)d' % {'date':date}
		readFile = open(oldDir + baseName +'.csv', 'rU')
		writeFile = open(newDir + baseName + '_new.csv', 'wb')
		r = csv.DictReader(readFile)
		w = csv.DictWriter(writeFile, r.fieldnames)
		w.writerow(dict((fn,fn) for fn in r.fieldnames))
		for row in r:
			old = row["OccupationOrAffiliation"]
			# Run regular expressions on occupations
			row["OccupationOrAffiliation"] = normalizeOrgs(old)
			# Test to see if prefix, first, middle, and suffix fields are empty
			organizeDates(row, "StartDate")
			organizeDates(row, "EndDate")
			if (row["FirstName"] == '' and row["MiddleName"] == ''): #Prefix/Suffix can be filled
				organizeNames(row)
			w.writerow(row)
		readFile.close()
		writeFile.close()