def fusegroupcolumns(tabdata, group): """ function which merges the columns in the group statement into one column with fused name this allows to group later in ggplot2 """ if isinstance(tabdata, list): tmpdata = fhutils.Table() tmpdata.columnames = tabdata[0] tmpdata.data = tabdata[1:] tabdata = tmpdata.iclone() newcolumnames = list(tabdata.columnames) groupjoin = ''.join([str(x) for x in group]) newcolumnames.append(groupjoin) ntab = fhutils.Table() ntab.columnames = newcolumnames for row in tabdata.iterrows(): mergecolumn = ''.join([str(row[ele]) for ele in group]) newrowlist = [row[ele] for ele in tabdata.columnames] + [mergecolumn] ntab.data.append(newrowlist) return ntab, groupjoin
def pyobj2dataframe(valuelis, columnames=True, rownames=False): """ create rpy2-R data frame from table or list to be used in R """ if isinstance(valuelis, robjects.vectors.DataFrame): return valuelis elif isinstance(valuelis, fhutils.Table): pytab = fhutils.Table('data') assert '-' not in valuelis.columnames, valuelis.columnames pytab.columnames = valuelis.columnames pytab.data = valuelis.data else: pytab = fhutils.Table('data') if columnames: # if columnames provided extracted from the list assert '-' not in valuelis[0], valuelis[0] pytab.columnames = valuelis[0] pytab.data = valuelis[1:] else: # add arbitrary columnames pytab.columnames = ['v' + str(i) for i in range(0, len(valuelis[0]))] pytab.data = valuelis tablist = [] for col in pytab.columnames: coldata = pytab.getcolumn(col) nv = [] for ele in coldata: if isinstance(ele, list) or isinstance(ele, tuple): ele = ele[0] if isinstance(ele, int) or isinstance(ele, float): nv.append(float(ele)) elif ele.isdigit(): nv.append(float(ele)) elif ele == 'NA': nv.append(rinterface.NARealType()) else: nv.append(fhutils.is_numeric(ele)) # try to convert into most likely type if sum([1 for ele in nv if isinstance(ele, float) or isinstance(ele, int)]) < len(nv): tablist.append((col, robjects.vectors.StrVector(nv))) else: tablist.append((col, robjects.vectors.FloatVector(nv))) dataframe = robjects.DataFrame(rlc.OrdDict(tablist)) if rownames: dfrmgen = robjects.r('data.frame') dataframe = dfrmgen(dataframe, row_names=1) return dataframe
def dataframe2pyobj(dataframe): table = fhutils.Table() colnames = list(dataframe.colnames) rownames = list(dataframe.rownames) col2data = [] for cn, col in dataframe.items(): if isinstance(col, robjects.vectors.FactorVector) is True: colevel = tuple(col.levels) col = tuple(col) ncol = [] for i in col: k = i - 1 ncol.append(colevel[k]) else: ncol = tuple(col) col2data.append((cn, ncol)) col2data.append(('rownames', rownames)) col2data = dict(col2data) table.columnames = ['rownames'] + colnames for cname in table.columnames: table.data.append(tuple(col2data[cname])) table.data = fhutils.pytranspose(table.data) return table
def pyrnparcomp(table,measurevar,groupvar): rconsole=rpystatinit() rnparcomp = rconsole("rnparcomp") dataframe = pyobj2dataframe(table) results = rmultcomp(data=dataframe, measurevar=measurevar,groupvar=groupvar) results = dataframe2pyobj(results) newtab = fhutils.Table('nparcomp') newtab.columnames = ['group1', 'group2', 'p-value'] for row in results: print(row) return newtab
def pyrmultcomp(table,measurevar,groupvar,contrastsvector = 'none'): rconsole=rpystatinit() rmultcomp = rconsole("rmultcomp") dataframe = pyobj2dataframe(table) contrastsvector = tuple([ "{tu1} - {tu2} = 0".format(tu1 = tu[0],tu2=tu[1]) for tu1,tu2 in contrastsvector]) contrastsvector = robjects.vectors.StrVector(contrastsvector) results = rmultcomp(data=dataframe, measurevar=measurevar,groupvar=groupvar, contrastsvector=contrastsvector) results = dataframe2pyobj(results) newtab = fhutils.Table('multcomp') newtab.columnames = ['group1', 'group2', 'p-value'] for row in results: print(row) return newtab
def pykruskalwallis(table, formula, term, filename='kruskalimage', gformat='pdf'): rconsole=rpystatinit() rkruskal = rconsole("rkruskal") dataframe = pyobj2dataframe(table) results = rkruskal(data=dataframe, formulastring=formula, term=term) results = dataframe2pyobj(results) newtab = fhutils.Table('kruskal') newtab.columnames = ['group1', 'group2', 'p-value'] for row in results: for i in range(1, len(results.columnames)): newtab.append((row[0], results.columnames[i], row[i])) return newtab
def propagatetable(datatable, groupcolumn, measurecolumn, errorcolumn): """ primitive aggregate i.e. averaging function for a table with error values propagates error using ufloat mechanism NA filtered out by default - no control """ if isinstance(groupcolumn, str): groupcolumn = (groupcolumn,) if isinstance(groupcolumn, unicode): groupcolumn = (groupcolumn,) keyvalue = [] for row in datatable.iterrows(): tmp = [] for col in groupcolumn: tmp.append(row[col]) if row[measurecolumn] in ('NA',): continue elif row[errorcolumn] in ('NA',): value = row[measurecolumn] else: value = ufloat(row[measurecolumn], row[errorcolumn]) keyvalue.append((tuple(tmp), value)) keyvalue = fhutils.keylis2dic(keyvalue) resultable = fhutils.Table('propagate') resultable.columnames = list(groupcolumn) + [measurecolumn, errorcolumn] for k, valuelist in keyvalue.items(): valuelist = [ele for ele in valuelist if ele != 'NA'] # NA filtered out ufavg = ufmeanstd(valuelist, asufloat=True) row = list(k) if isinstance(ufavg, uncertainties.UFloat): row += [ufavg.nominal_value, ufavg.std_dev] else: row += [ufavg, 0] resultable.append(row) resultable.sortrow() return resultable