예제 #1
0
def fusegroupcolumns(tabdata, group):
    """
    function which merges the columns
    in the group statement into
    one column with fused name
    this allows to group later
    in ggplot2
    """

    if isinstance(tabdata, list):
        tmpdata = fhutils.Table()
        tmpdata.columnames = tabdata[0]
        tmpdata.data = tabdata[1:]
        tabdata = tmpdata.iclone()

    newcolumnames = list(tabdata.columnames)
    groupjoin = ''.join([str(x) for x in group])
    newcolumnames.append(groupjoin)
    ntab = fhutils.Table()
    ntab.columnames = newcolumnames
    for row in tabdata.iterrows():
        mergecolumn = ''.join([str(row[ele]) for ele in group])
        newrowlist = [row[ele] for ele in tabdata.columnames] + [mergecolumn]
        ntab.data.append(newrowlist)
    return ntab, groupjoin
예제 #2
0
def pyobj2dataframe(valuelis, columnames=True, rownames=False):
    """

    create rpy2-R data frame from table or list
    to be used in R

    """

    if isinstance(valuelis, robjects.vectors.DataFrame):
        return valuelis

    elif isinstance(valuelis, fhutils.Table):
        pytab = fhutils.Table('data')
        assert '-' not in valuelis.columnames, valuelis.columnames
        pytab.columnames = valuelis.columnames
        pytab.data = valuelis.data

    else:
        pytab = fhutils.Table('data')
        if columnames:  # if columnames provided extracted from the list
            assert '-' not in valuelis[0], valuelis[0]
            pytab.columnames = valuelis[0]
            pytab.data = valuelis[1:]
        else:  # add arbitrary columnames
            pytab.columnames = ['v' + str(i) for i in range(0, len(valuelis[0]))]
            pytab.data = valuelis

    tablist = []
    for col in pytab.columnames:
        coldata = pytab.getcolumn(col)
        nv = []
        for ele in coldata:
            if isinstance(ele, list) or isinstance(ele, tuple):
                ele = ele[0]
            if isinstance(ele, int) or isinstance(ele, float):
                nv.append(float(ele))
            elif ele.isdigit():
                nv.append(float(ele))
            elif ele == 'NA':
                nv.append(rinterface.NARealType())
            else:
                nv.append(fhutils.is_numeric(ele))  # try to convert into most likely type
        if sum([1 for ele in nv if isinstance(ele, float) or isinstance(ele, int)]) < len(nv):
            tablist.append((col, robjects.vectors.StrVector(nv)))
        else:
            tablist.append((col, robjects.vectors.FloatVector(nv)))

    dataframe = robjects.DataFrame(rlc.OrdDict(tablist))
    if rownames:
        dfrmgen = robjects.r('data.frame')
        dataframe = dfrmgen(dataframe, row_names=1)
    return dataframe
예제 #3
0
def dataframe2pyobj(dataframe):
    table = fhutils.Table()

    colnames = list(dataframe.colnames)
    rownames = list(dataframe.rownames)
    col2data = []
    for cn, col in dataframe.items():
        if isinstance(col, robjects.vectors.FactorVector) is True:
            colevel = tuple(col.levels)
            col = tuple(col)
            ncol = []
            for i in col:
                k = i - 1
                ncol.append(colevel[k])
        else:
            ncol = tuple(col)
        
        col2data.append((cn, ncol))

    col2data.append(('rownames', rownames))
    col2data = dict(col2data)

    table.columnames = ['rownames'] + colnames
    for cname in table.columnames:
        table.data.append(tuple(col2data[cname]))
    table.data = fhutils.pytranspose(table.data)
    return table
예제 #4
0
def pyrnparcomp(table,measurevar,groupvar):
    rconsole=rpystatinit()
    rnparcomp = rconsole("rnparcomp")
    dataframe = pyobj2dataframe(table)
    results = rmultcomp(data=dataframe, measurevar=measurevar,groupvar=groupvar)
    results = dataframe2pyobj(results)
    newtab = fhutils.Table('nparcomp')
    newtab.columnames = ['group1', 'group2', 'p-value']
    for row in results:
        print(row)
    return newtab
예제 #5
0
def pyrmultcomp(table,measurevar,groupvar,contrastsvector = 'none'):
    rconsole=rpystatinit()
    rmultcomp = rconsole("rmultcomp")
    dataframe = pyobj2dataframe(table)
    contrastsvector = tuple([ "{tu1} - {tu2} = 0".format(tu1 = tu[0],tu2=tu[1]) for tu1,tu2 in contrastsvector])
    contrastsvector = robjects.vectors.StrVector(contrastsvector)
    results = rmultcomp(data=dataframe, measurevar=measurevar,groupvar=groupvar, contrastsvector=contrastsvector)
    results = dataframe2pyobj(results)
    newtab = fhutils.Table('multcomp')
    newtab.columnames = ['group1', 'group2', 'p-value']
    for row in results:
        print(row)
    return newtab
예제 #6
0
def pykruskalwallis(table, formula, term, filename='kruskalimage', gformat='pdf'):
    rconsole=rpystatinit()
    rkruskal = rconsole("rkruskal")

    dataframe = pyobj2dataframe(table)
    results = rkruskal(data=dataframe, formulastring=formula, term=term)
    results = dataframe2pyobj(results)
    newtab = fhutils.Table('kruskal')
    newtab.columnames = ['group1', 'group2', 'p-value']
    for row in results:
        for i in range(1, len(results.columnames)):
            newtab.append((row[0], results.columnames[i], row[i]))

    return newtab
예제 #7
0
def propagatetable(datatable, groupcolumn, measurecolumn, errorcolumn):
    """
    primitive aggregate i.e. averaging function
    for a table with error values
    propagates error using ufloat
    mechanism
    NA filtered out by default - no control
    """

    if isinstance(groupcolumn, str):
        groupcolumn = (groupcolumn,)

    if isinstance(groupcolumn, unicode):
        groupcolumn = (groupcolumn,)

    keyvalue = []
    for row in datatable.iterrows():
        tmp = []
        for col in groupcolumn:
            tmp.append(row[col])
        if row[measurecolumn] in ('NA',):
            continue
        elif row[errorcolumn] in ('NA',):
            value = row[measurecolumn]
        else:
            value = ufloat(row[measurecolumn], row[errorcolumn])
        keyvalue.append((tuple(tmp), value))

    keyvalue = fhutils.keylis2dic(keyvalue)

    resultable = fhutils.Table('propagate')
    resultable.columnames = list(groupcolumn) + [measurecolumn, errorcolumn]

    for k, valuelist in keyvalue.items():
        valuelist = [ele for ele in valuelist if ele != 'NA']  # NA filtered out
        ufavg = ufmeanstd(valuelist, asufloat=True)
        row = list(k)
        if isinstance(ufavg, uncertainties.UFloat):
            row += [ufavg.nominal_value, ufavg.std_dev]
        else:
            row += [ufavg, 0]

        resultable.append(row)
    resultable.sortrow()

    return resultable