Example #1
0
def get_headers_with_cols(cols_to_include):
    headers = gd.get_headers()
    final = []
    for x in range(len(headers)):
	if x in cols_to_include:
	    final.append(headers[x])
    
    return final
def change_to_list_of_lists(list_of_dicts):
    headers = gd.get_headers()
    final = []

    for x in range(len(list_of_dicts)):
	final.append([])
    
    for entry in headers:
	col = gd.get_data_slice(entry, list_of_dicts)
	for y in range(len(list_of_dicts)):
	    final[y].append(col[y])

    return final
def normalize_data():
    list_of_dicts = gd.get_data_list_of_dicts()
    headers = gd.get_headers()
    final = []
    for y in range(len(headers)):
	final.append([])

    for x in range(len(headers)):
	col = gd.get_data_slice(headers[x], list_of_dicts)
	colFloat = convert_colFloat(col)
	final[x].append(np.std(colFloat))
	final[x].append(np.mean(colFloat))
	
    return final
def cleaned_headers():
    headers = gd.get_headers()
    new_headers = []

    for entry in headers:
	final = ""
	for i in entry:
	    if (i == " " or i == "(" or i == ")" or i == "/" or i == "^"):
		final = final
	    else:
		final = final + i
	print "entry: " + final
	new_headers.append(final)

    return new_headers
Example #5
0
def remove_dup_cols():
    list_of_dicts = gd.get_data_list_of_dicts()
    headers = gd.get_headers()
     
    cols_2remove = []
    for x in range(len(headers)-15):
	col1 = gd.get_data_slice(headers[x],list_of_dicts)
	col1 = replace_missing(col1)
	for y in range(len(headers)-15):
	    col2 = gd.get_data_slice(headers[y],list_of_dicts)	 
	    col2 = replace_missing(col2)
	    r = find_r(col1,col2)
	    pairIn = exist_same_pair(cols_2remove, x, y)
	    if r>=.99 and r<=1.002 and not x ==y and not pairIn:
		    cols_2remove.append([y,x, r[0], headers[x], headers[y]])
Example #6
0
def clean_headers(headers):
    all_headers = gd.get_headers()
    new_headers = []

    for entry in headers:
	final = ""
	for i in entry:
	    if (i == " "):
		final = final
	    else:
		final = final + i
	print "entry: " + final
	new_headers.append(final)
    
    print new_headers
    return new_headers
Example #7
0
def create_rows_list_of_lists_without_cols(list_of_dicts, cols_to_exclude):
    headers = gd.get_headers()
    final = []
    for x in range(len(list_of_dicts)):
	final.append([])

    counter = 0
    for x in range(len(headers)):
	if not x in cols_to_exclude: 
	    col = gd.get_data_slice(headers[x], list_of_dicts)
	    avg = average_available_values(col)
	    cleanedCol = gd.get_data_slice_replace(headers[x], list_of_dicts, avg)
	    counter = 0
	    for entry in cleanedCol:
		final[counter].append(entry)
		counter += 1
    
    return final
import get_data as gd

headers_clean = gd.get_headers()
list_of_dicts = gd.get_data_list_of_dicts()

new_headers = []
for h in headers_clean:
    h = h.split(",")
    if not " Error" in h and not "" in h:
        new_headers.append(",".join(h))

final = []
for entry in list_of_dicts:
    temp = {}
    for header in new_headers:
        temp[header] = entry[header]

    final.append(temp)

filename = "noError.csv"
headers = new_headers
gd.write_data_dicts(filename, headers, final)
import get_data as gd

list_of_dicts = gd.get_data_list_of_dicts()
final = []

for entry in list_of_dicts:
    if entry["Transcript"] != "":
	final.append(entry)

filename = "justTranscripts.csv"
headers = gd.get_headers()
gd.write_data_dicts(filename, headers, final)
	
import get_data as gd
import get_data2 as gd2
import get_data3 as gd3

list_of_dicts = gd.get_data_list_of_dicts()
full = gd2.get_data_list_of_dicts()
full_headers = gd2.get_headers()
headers = gd.get_headers()
headers_income = gd3.get_headers()

codes = {}
full_clean = []
final_headers = []

for h in headers:
    h = h.split(" - ")
    code = h[0]
    try:
        name = h[1]
        codes[code] = name
    except:
        print h

for h2 in headers_income:
    h2 = h2.split(" - ")
    code = h2[0]
    try:
        if not "Error" in h2[1]:
            name = h2[1]
            codes[code] = name
    except:
import get_data as gd

headers_clean = gd.get_headers()
list_of_dicts = gd.get_data_list_of_dicts()

new_headers = []
for h in headers_clean:
    h = h.split(",")
    if not " Error" in h and not "" in h:
	new_headers.append(",".join(h))


final = []
for entry in list_of_dicts:
    temp = {}
    for header in new_headers:
	temp[header] = entry[header]

    final.append(temp)

filename = "noError.csv"
headers = new_headers
gd.write_data_dicts(filename,headers,final)