def parse_fidarestat_data(): """ Convert fidarestat data into simple text file, with columns gene = Ensembl C = non-diabetic controls D = diabetic untreated F = diabetic + fidarestat """ book = xlrd.open_workbook(os.path.join(PATH, 'fidarestat.xls')) sbml = om.read_sbml(os.path.join(PATH, RAMBO_NAME)) gene_list = om.get_list_of_genes(sbml) sheet = book.sheet_by_name("maxd1antilog2") file = os.path.join(PATH, 'fidarestat.txt') with open(file, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerow(['gene', 'C', 'D', 'F']) done = [] for row in range(1, sheet.nrows): F = strip_string(sheet.cell(row, 3).value) C = strip_string(sheet.cell(row, 4).value) D = strip_string(sheet.cell(row, 5).value) gene = strip_string(sheet.cell(row, 17).value) if (gene in gene_list) and (gene not in done): done.append(gene) writer.writerow([gene, C, D, F]) for gene in gene_list: if gene not in done: done.append(gene) writer.writerow([gene, 0, 0, 0])
def parse_timecourse_data(): """ Convert timecourse data into simple text file, with columns gene = Ensembl C4 = controls after 4 weeks D4 = diabetic after 4 weeks C8 = controls after 4 weeks D8 = diabetic after 4 weeks """ book = xlrd.open_workbook(os.path.join(PATH, 'timecourse.xls')) sbml = om.read_sbml(os.path.join(PATH, RAMBO_NAME)) gene_list = om.get_list_of_genes(sbml) sheet = book.sheet_by_name("E-MEXP-515-processed-data-13435") file = os.path.join(PATH, 'timecourse.txt') all = np.array([]) with open(file, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerow(['gene', 'C4', 'D4', 'C8', 'D8']) done = [] data = [] for row in range(2, sheet.nrows): C4 = strip_string(sheet.cell(row, 9).value) if not C4: C4 = 0 D4 = strip_string(sheet.cell(row, 17).value) if not D4: D4 = 0 C8 = strip_string(sheet.cell(row, 25).value) if not C8: C8 = 0 D8 = strip_string(sheet.cell(row, 33).value) if not D8: D8 = 0 gene = strip_string(sheet.cell(row, 1).value) if (gene in gene_list) and (gene not in done): done.append(gene) writer.writerow([gene, C4, D4, C8, D8]) for X in [C4, D4, C8, D8]: x = float(X) if x: data.append(x) for gene in gene_list: if gene not in done: done.append(gene) writer.writerow([gene, 0, 0, 0, 0]) print 'data range:\nmin:\t%g\nmean:\t%g\nmax:\t%g\n' %(np.min(data), np.mean(data), np.max(data))
def parse_liver_data(): """ Convert liver data into simple text file, with columns gene = Ensembl C = non-diabetic controls D = diabetic untreated CT = control + drug DT = diabetic + drug Csd = control standard deviation .. etc """ book = xlrd.open_workbook(os.path.join(PATH, 'liver.xls')) sbml = om.read_sbml(os.path.join(PATH, RAMBO_NAME)) gene_list = om.get_list_of_genes(sbml) sheet1 = book.sheet_by_name("Means") sheet2 = book.sheet_by_name("Stdevs") file = os.path.join(PATH, 'liver.txt') with open(file, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerow(['gene', 'C', 'Csd', 'D', 'Dsd', 'CT', 'CTsd', 'DT', 'DTsd']) done = [] for row in range(1, sheet1.nrows): gene = strip_string(sheet1.cell(row, 0).value) D = strip_string(sheet1.cell(row, 1).value) C = strip_string(sheet1.cell(row, 2).value) DT = strip_string(sheet1.cell(row, 3).value) CT = strip_string(sheet1.cell(row, 4).value) Dsd = strip_string(sheet2.cell(row, 1).value) Csd = strip_string(sheet2.cell(row, 2).value) DTsd = strip_string(sheet2.cell(row, 3).value) CTsd = strip_string(sheet2.cell(row, 4).value) if (gene in gene_list) and (gene not in done): done.append(gene) writer.writerow([gene, C, Csd, D, Dsd, CT, CTsd, DT, DTsd]) for gene in gene_list: if gene not in done: done.append(gene) writer.writerow([gene, 0, 0, 0, 0, 0, 0, 0, 0])