コード例 #1
0
def getResult(request):
    # 获取所有的待计算数据, 并转成R可以读取的格式
    ListingId = request.POST.getlist('ListingId', [])
    Title = request.POST.getlist('Title', [])
    inputAmount = request.POST['inputAmount']
    Months = request.POST.getlist('Months', [])
    CreditCode = request.POST.getlist('CreditCode', [])
    Rate = request.POST.getlist('Rate', [])
    data = rlc.OrdDict([('ListingId', rob.StrVector(ListingId)),
                        ('Title', rob.StrVector(Title)),
                        ('inputAmount',
                         rob.IntVector([inputAmount] * len(ListingId))),
                        ('Months', rob.IntVector(Months)),
                        ('CreditCode', rob.StrVector(CreditCode)),
                        ('Rate', rob.FloatVector(Rate))])
    inputCalDataFrame = rob.DataFrame(data)
    """导入R"""
    rFilePath = os.path.dirname(os.path.abspath(__file__)) + '/DECISION.R'
    rob.r.source(rFilePath)
    decision = rob.globalenv['DECISION'](inputCalDataFrame)
    decisionDataFrame = pandas2ri.ri2py_dataframe(
        decision)  # 转为Python的DataFrame格式
    """/导入R """
    # 转换为输出结果
    inputAmount = list(decisionDataFrame['inputAmount'])[0]
    resultList = []
    for index, row in decisionDataFrame.iterrows():
        resultList.append(row.to_dict())

    return render(request, 'result.html', locals())
コード例 #2
0
ファイル: two_stage_lm.py プロジェクト: yosifovemil/thesis
    def to_dataframe(self, data):
        #transpose the list of dicts into a dict of lists
        transposed = defaultdict(list)
        for entry in data:
            for key in entry:
                transposed[key].append(entry[key])

        for key in transposed:
            #get the appropriate conversion function
            if key in formats._INTS:
                conv_f = robjects.IntVector
            elif key in formats._FLOATS:
                conv_f = robjects.FloatVector
            elif key == formats.DATE:
                transposed[key] = formats.strfdate_standard_list(
                    transposed[key])
                conv_f = robjects.StrVector
            elif key in formats._NO_CONVERSION:
                conv_f = robjects.StrVector
            else:
                raise Exception("Conversion function not found for key %s" %
                                key)

            transposed[key] = conv_f(transposed[key])

        data_frame = robjects.DataFrame(transposed)
        return data_frame
コード例 #3
0
def _plt_distr(dat,
               col,
               title='',
               splitBy_pfill=True,
               pfill='label',
               independentpdf=False,
               fname='xdistr.pdf'):
    df = dat[dat[pfill] != 'NA']  ## remove invalid pairs
    n = len(df)
    df = {
        col: robjects.FloatVector(list(df[col])),
        pfill: robjects.StrVector(list(df[pfill]))
    }
    df = robjects.DataFrame(df)

    pp = ggplot2.ggplot(df) + \
        ggplot2.ggtitle('%s [Total = %s]' % (title, n))

    ## Plot1: counts
    if splitBy_pfill:
        p1 = pp + ggplot2.aes_string(x=col, fill=pfill)
    else:
        p1 = pp + ggplot2.aes_string(x=col)

    ## Plot2: density
    if splitBy_pfill:
        p2 = pp + ggplot2.aes_string(x=col, fill=pfill, y='..density..')
    else:
        p2 = pp + ggplot2.aes_string(x=col, y='..density..')
    p2 = p2 + ggplot2.geom_density(alpha=.5, origin=-500)

    if col == 'distance':
        p1 = p1 + \
            ggplot2.geom_histogram(binwidth=1000, alpha=.5, position='identity', origin=-500) + \
            ggplot2.xlim(-1000, 51000)

        p2 = p2 + \
            ggplot2.geom_histogram(binwidth=1000, alpha=.33, position='identity', origin=-500) + \
            ggplot2.xlim(-1000, 51000)
    else:
        p1 = p1 + \
            ggplot2.geom_histogram(alpha=.5, position='identity')

        p2 = p2 + \
            ggplot2.geom_histogram(alpha=.33, position='identity')

        if col == 'correlation':
            p1 = p1 + ggplot2.xlim(-1.1, 1.1)
            p2 = p2 + ggplot2.xlim(-1.1, 1.1)

    if independentpdf:
        grdevices = importr('grDevices')
        grdevices.pdf(file=fname)
        p1.plot()
        p2.plot()
        grdevices.dev_off()
    else:
        p1.plot()
        p2.plot()
    return
コード例 #4
0
def deseq2_basic(data_frame,
                 numerator=2,
                 denominator=1,
                 category_field='Category',
                 sample_field='Sample',
                 batch_field=None,
                 expression_name_field='Name',
                 counts_field='NumReads'):
    # from a dataframe
    # https://stackoverflow.com/questions/41821100/running-deseq2-through-rpy2
    design = '~ `' + category_field + '`'
    if batch_field is not None:
        design = '~ `' + batch_field + '` + `' + category_field + '`'
    #print(design)
    design = Formula(design)
    mat = data_frame.pivot(columns=sample_field,
                           index=expression_name_field,
                           values=counts_field)
    mfields = [sample_field, category_field]
    if batch_field is not None: mfields += [batch_field]
    meta = data_frame[mfields].groupby(sample_field).first().loc[mat.columns]
    metaarr = {}
    metaarr[category_field] = robjects.IntVector(meta[category_field].apply(
        lambda x: _trans(x, numerator, denominator)))
    if batch_field is not None:
        metaarr[batch_field] = robjects.IntVector(meta[batch_field])
    dds0 = deseq.DESeqDataSetFromMatrix(countData=mat.astype(int),
                                        colData=robjects.DataFrame(metaarr),
                                        design=design)
    dds1 = deseq.DESeq(dds0)
    res = rpy2.robjects.pandas2ri.ri2py(as_df(deseq.results(dds1)))
    res.index = mat.index
    res.index.name = expression_name_field
    return (dds0, dds1, res, mat, meta)
コード例 #5
0
 def testDim(self):
     letters = robjects.r.letters
     numbers = robjects.r('1:26')
     df = robjects.DataFrame(
         rlc.TaggedList((letters, numbers), tags=('letters', 'numbers')))
     self.assertEqual(26, df.nrow)
     self.assertEqual(2, df.ncol)
コード例 #6
0
def test_init_from_taggedlist():
    letters = robjects.r.letters
    numbers = robjects.r('1:26')
    df = robjects.DataFrame(
        rlc.TaggedList((letters, numbers), tags=('letters', 'numbers')))

    assert df.rclass[0] == 'data.frame'
コード例 #7
0
    def analyse_permanova(self, user_request, otu_table, headers, sample_labels, metadata_values, strata_values, sample_ids_from_metadata):

        print("Starting PERMANOVA")
        groups = robjects.FactorVector(robjects.StrVector(metadata_values))

        # Forms an OTU only table (without IDs)
        allOTUs = []
        col = 0
        while col < len(otu_table[0]):
            colVals = []
            row = 0
            while row < len(otu_table):
                sampleID = sample_labels[row]
                if sampleID in sample_ids_from_metadata:
                    colVals.append(otu_table[row][col])
                row += 1
            allOTUs.append((headers[col], robjects.FloatVector(colVals)))
            col += 1

        od = rlc.OrdDict(allOTUs)
        dataf = robjects.DataFrame(od)

        if strata_values is None:
            permanova = self.veganR.betaDiversityPERMANOVA(dataf, groups)
        else:
            strata = robjects.FactorVector(robjects.StrVector(strata_values))
            permanova = self.veganR.betaDiversityPERMANOVAWithStrata(dataf, groups, strata)
        abundancesObj = {}
        abundancesObj["permanova"] = str(permanova)

        return abundancesObj
コード例 #8
0
def plot(args):
  """ Plot data parsed from logfiles previously """

  try:
    with open(args.input) as fin:
      data = json.load(fin)
  except IOError as e:
    logger.error("Could not read '{0}'".format(args.input))
    logger.debug("IOError: {0}".format(e))
    raise RuntimeError()
  except Exception as e:
    logger.error("Invalid input file. Expected JSON")
    logger.debug("JSON Error: {0}".format(e))
    raise RuntimeError()

  # Get the first one
  parser = data.values()[0].keys()[0] if args.p is None else args.p
  dataf = util.filter_and_flatten(data, args.f, parser, label_files=True)

  r_dataf = ro.DataFrame(dataf.asRObjects)
  gp = ggplot2.ggplot(r_dataf)

  plotargs = get_ggplot_args(args)
  while True:
    render_plot(gp, plotargs)
    try:
      plotargs = print_menu(plotargs, dataf)
    except SavePlotException as e:
      try:
        ro.r("ggsave(filename='{0}')".format(e.filename))
      except IOError as io:
        logging.warn("Error saving plot: {0}".format(io))
    except StopIteration:
      return
コード例 #9
0
    def _create_R_dataframe(self, job_ads, include_columns):
        """Converts job ads to R dataframe.

        Arguments
        ----------
        job_ads : list[:class:`JobAd`]
            List of :class:`JobAd` instances.
        include_columns : list[str]
            Defines which columns are included in the dataframe. 

        Returns
        ----------
        dataf : :class:`robjects.DataFrame`
            :class:`robjects.DataFrame` representing job ads.
        """
        
        #modify structure to type {column:[rows]}   
        if len(job_ads) == 0:
            raise Exception("No job ads to convert to R dataframe.")

        job_ads_dataf = {}
        for column in include_columns:
            job_ads_dataf[column] = [self._remove_diacritics(ad[column]) 
                                       for ad in job_ads]
            if (column == "relevant"):
                job_ads_dataf[column] = IntVector(job_ads_dataf[column])
            else:
                job_ads_dataf[column] = self._base.I(StrVector(job_ads_dataf[column]))
             
        return robjects.DataFrame(job_ads_dataf)
コード例 #10
0
def dict_list_to_df(data, base=None):
    """Converts a list of dictionaries to an rpy2 data frame"""
    vectors = dict()
    keys = data[0].keys()
    if base is None:
        base = rpackages.importr('base')

    for key in keys:
        # First extract each key into its own list
        var = []
        for entry in data:
            var.append(entry[key])

        # convert None to R's NA
        var = substitute_NA(var, key)

        # next convert it into a rpy2 vector
        if key in formats._INTS:
            vect = robjects.IntVector(var)
        elif key in formats._FLOATS:
            vect = robjects.FloatVector(var)
        elif key == formats.DATE:
            var_str = [x.strftime(formats.DMY) for x in var]
            vect = robjects.StrVector(var_str)
            vect = base.as_Date(vect, formats.DMY)
        else:
            vect = robjects.StrVector(var)

        vectors[key] = vect

    return (robjects.DataFrame(vectors))
コード例 #11
0
def anova_shape_r_nonoptimal(model, sdata):

    pre_data_frame = sdata.create_r_pre_data_frame(model)
    statsout = StatsOutput(dim=sdata.phenotype_array.shape[1])
    for i in xrange(sdata.phenotype_array.shape[1]):
        pre_data_frame['response'] = robjects.FloatVector(
            sdata.phenotype_array[:, i])
        dataframe = robjects.DataFrame(pre_data_frame)

        robj = robjects.r
        fit_full = robj.lm(robjects.Formula('response' + ' ~ ' +
                                            model.fullmodel),
                           data=dataframe)
        fit_reduced = robj.lm(robjects.Formula('response' + ' ~ ' +
                                               model.nullmodel),
                              data=dataframe)
        model_diff = robjects.r.anova(fit_full, fit_reduced)

        idx_unique = fit_full.rx2('coefficients').names.index(model.unique)
        direction = np.sign(fit_full.rx2('coefficients')[idx_unique])
        idx_pvalues = model_diff.names.index('Pr(>F)')
        statsout.pvalues[i] = model_diff[idx_pvalues][1]
        statsout.pvalues_signed[i] = direction * model_diff[idx_pvalues][1]
        statsout.tvalues[i] = fit_full.rx2('coefficients')[idx_unique]
    return statsout
コード例 #12
0
 def CSRtoDCG(sparse_matrix):
     data = robjects.DataFrame(sparse_matrix.toarray().flatten())
     nrows, ncols = sparse_matrix.shape
     return MatrixInterface.Matrix(data,
                                   nrow=nrows,
                                   ncol=ncols,
                                   sparse=True)
コード例 #13
0
ファイル: h5_rdata.py プロジェクト: yama1968/mldata-utils
 def write(self, data):
     group=self.get_data_group(data)
     dest=robjects.globalenv
     if group == 'data':
         datavals = data['data']
         ordering = data['ordering']
         attrlist = []
         nameind = 0
         names = data['names']
         types = data['types']
         for cur_feat in ordering:
             if len(datavals[cur_feat].shape) > 1:
                 for k in range(datavals[cur_feat].shape[0]):
                     if str(types[nameind]).startswith('nominal'):
                         attrlist.append((names[nameind], robjects.FactorVector(robjects.StrVector(datavals[cur_feat][k]))))
                     else:
                         attrlist.append((names[nameind], datavals[cur_feat][k]))
                     nameind += 1
             else:
                 if str(types[nameind]).startswith('nominal'):
                     attrlist.append((names[nameind], robjects.FactorVector(robjects.StrVector(datavals[cur_feat]))))
                 else:
                     attrlist.append((names[nameind], datavals[cur_feat]))
                 nameind += 1
         dest[data['name']] = robjects.DataFrame(rlc.OrdDict(attrlist))
     elif group == 'task':
         d=data[group]
         for k in list(d.keys()):
             dest[k] = d[k]
     robjects.r.save(*list(robjects.r.ls(dest)), file=self.fname)
コード例 #14
0
    def pearsons(self, parm1, parm2):
        vectorDict = {}
        dataframe = self.getSelectParmData()
        names = dataframe.names

        vectorDict[parm1] = dataframe[names.index(parm1)]
        vectorDict[parm2] = dataframe[names.index(parm2)]

        if self.skewness(vectorDict[parm1]) > 1 or self.skewness(
                vectorDict[parm1]) < -1:
            vectorDict[parm1] = base.log10(vectorDict[parm1])

        if self.skewness(vectorDict[parm2]) > 1 or self.skewness(
                vectorDict[parm2]) < -1:
            vectorDict[parm2] = base.log10(vectorDict[parm2])

        newDataframe = robjects.DataFrame(vectorDict)

        matrix = base.as_matrix(newDataframe)
        cor = hmisc.rcorr(matrix, type="pearson")

        #return stats.cor(newDataframe,use="pairwise.complete.obs",method="pearson")

        #return the correlation coeeficient and the p-value as extracted from the resulting matrix
        return cor[0][1], cor[2][1]
コード例 #15
0
def makePlot(grdevices, plotName, samp_set1_vals, samp_set2_vals,
             image_file_type):

    samp_vector = ["set1" for i in range(len(samp_set1_vals))]
    samp_vector.extend(["set2" for i in range(len(samp_set2_vals))])

    data_vector = samp_set1_vals + samp_set2_vals

    dframe = robjects.DataFrame({
        "sample": robjects.StrVector(samp_vector),
        "value": robjects.FloatVector(data_vector)
    })

    gp = ggplot2.ggplot(dframe)

    pp = gp + \
     ggplot2.aes_string(x="sample", y='value') + \
     ggplot2.geom_jitter(position=ggplot2.position_jitter(width=0.2, height=0.01)) +\
     ggplot2.theme_bw()

    #     ggplot2.geom_boxplot(stat="identity") +\

    if image_file_type == "pdf":
        grdevices.pdf(file=plotName)
    else:
        grdevices.png(file=plotName, width=512, height=512)
    pp.plot()
    grdevices.dev_off()
コード例 #16
0
ファイル: loc_graph.py プロジェクト: pascalj/dash-apps
def bargraph_language(results):
    r = robjects.r

    for language in languages:
        varis = []
        probs = []
        locs = []
        for (lang, prob, var) in results.keys():
            if lang == language:
                loc = results[(lang, prob, var)]
                varis.append(pretty_varis[var])
                probs.append(prob)
                locs.append(loc)
        r.pdf('bargraph-loc-lang-' + language + '.pdf',
              height=pdf_height(),
              width=pdf_width())
        df = robjects.DataFrame({
            'Variation': StrVector(varis),
            'Problem': StrVector(probs),
            'Lines': IntVector(locs),
        })

        #print (df)
        gp = ggplot2.ggplot(df)

        pp = gp + \
            ggplot2.aes_string (x='Problem', y='Lines', fill='Variation') + \
            ggplot2.geom_bar (position='dodge', stat='identity') + \
            ggplot2_options () + \
            ggplot2_colors () + \
            robjects.r('scale_x_discrete(limits=c("randmat", "thresh", "winnow", "outer", "product", "chain"))') +\
            robjects.r('ylab("Lines of Code")')
        pp.plot()
        r['dev.off']()
コード例 #17
0
def make_output(tss_cov, out_prefix, upstream, downstream):
    # dump raw counts to file
    raw_out = open('%s_raw.txt' % out_prefix,'w')
    for i in range(-upstream,downstream+1):
        print >> raw_out, '%d\t%e' % (i, tss_cov[upstream+i])
    raw_out.close()

    # make plot data structures
    tss_i = ro.IntVector(range(-upstream,downstream+1))
    cov = ro.FloatVector(tss_cov)
    df = ro.DataFrame({'tss_i':tss_i, 'cov':cov})

    # construct full plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index') + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_full.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()

    # construct zoomed plot
    gp = ggplot2.ggplot(df) + \
        ggplot2.aes_string(x='tss_i', y='cov') + \
        ggplot2.geom_point() + \
        ggplot2.scale_x_continuous('TSS index',limits=ro.IntVector([-1000,1000])) + \
        ggplot2.scale_y_continuous('Coverage')

    # plot to file
    grdevices.pdf(file='%s_zoom.pdf' % out_prefix)
    gp.plot()
    grdevices.dev_off()
コード例 #18
0
    def testNewFromTaggedList(self):
        letters = robjects.r.letters
        numbers = robjects.r('1:26')
        df = robjects.DataFrame(
            rlc.TaggedList((letters, numbers), tags=('letters', 'numbers')))

        self.assertEqual("data.frame", df.rclass[0])
コード例 #19
0
ファイル: test_rFunctions.py プロジェクト: npklein/pyMSA
    def test_index(self):
        expectedIndex = 1

        # testing dataframe
        dict = {
            'test1': R.IntVector((12, 12, 15)),
            'test2': R.IntVector((32, 4, 12)),
            'test3': R.IntVector((3, 12, 26))
        }  # note that test1 has 12 in row 1 and row 2
        testFrame = R.DataFrame(dict)
        # because the dict from which the dataframe is made is not ordened, the testframe is ordened first to make sure that the column 'test2' is always at the same position
        # however, to do tis I'm using the index function that I'm testing, which might or might not be a right thing to do
        testFrame = testFrame.rx[True, R.r['with'](
            testFrame,
            R.r['order'](R.IntVector([
                rFunctions.index(testFrame, 'test1'),
                rFunctions.index(testFrame, 'test2'),
                rFunctions.index(testFrame, 'test3')
            ])),
        )]

        actualDataFrameIndex = rFunctions.index(testFrame, 'test2')
        # testing matrix (same values as the dataframe)
        testMatrix = R.r.matrix(R.IntVector([12, 12, 15, 32, 4, 12, 3, 12,
                                             26]),
                                nrow=3)
        testMatrix.colnames = R.StrVector(['test1', 'test2', 'test3'])
        actualMatrixIndex = rFunctions.index(testMatrix, 'test2')

        self.assertEqual(expectedIndex, actualDataFrameIndex)
        self.assertEqual(expectedIndex, actualMatrixIndex)
コード例 #20
0
def getdata():
    date = request.form['date']
    date = '2015'
    print('ok')
    print(date)

    datar = data[date]
    datar = robjects.DataFrame(datar)

    freq = 5
    datar = robjects.r['getdata'](datar, freq)
    datar = pandas2ri.ri2py(datar)

    dates = datar['Date']
    dates = dates.tolist()
    bar = datar[['Open', 'Close', 'Low', 'High']]
    bar = np.array(bar)
    bar = bar.tolist()
    cci = datar['cci']
    cci = cci.tolist()
    cci = [round(c, 2) for c in cci]
    sma = datar['sma']
    sma = sma.tolist()

    option = {'bar': bar, 'sma': sma, 'cci': cci, 'dates': dates}
    return jsonify(option)
コード例 #21
0
 def Python_df_to_R_df(self,Python_df):
     #func to convert a Python df into R DF
     pandas2ri.activate()
     r_from_pd_df = robjects.DataFrame({})
     with localconverter(robjects.default_converter + pandas2ri.converter):
         r_from_pd_df = robjects.conversion.py2rpy(Python_df)
     return r_from_pd_df
コード例 #22
0
def test_dim():
    letters = robjects.r.letters
    numbers = robjects.r('1:26')
    df = robjects.DataFrame(
        rlc.TaggedList((letters, numbers), tags=('letters', 'numbers')))
    assert df.nrow == 26
    assert df.ncol == 2
コード例 #23
0
ファイル: test.py プロジェクト: genericity/test-analysis
    def get_response_matrix(self):
        matrix = {}

        matrix_index = 0
        # For each question:
        for question_index in range(self.test_length):
            question = self.questions[question_index]

            # Cannot have questions where either 100% or 0% were correct, as ltm will crash.
            # This also excludes questions the user has opted to discard.
            if not question.discard:
                # Header value.
                question_response_vector = []

                # Retrieve all the responses for each student.
                for j in range(len(self.students)):
                    # question_response_vector.append(self.students[j].is_right(question_index))
                    question_response_vector.append(
                        int(self.students[j].is_right(question_index)))

                # question_response_vector.append(1)
                matrix_index += 1
            else:
                # Ignore the exclusions.
                pass

            # Convert to a vector.
            # matrix[question_index + 1] = robjects.BoolVector(question_response_vector)
            matrix[matrix_index] = robjects.IntVector(question_response_vector)

        # Convert the dictionary of vectors to a dataframe.
        response_matrix = robjects.DataFrame(matrix)

        return response_matrix
コード例 #24
0
 def calc_size_factors(self):
     self._count_df = np.round(self._count_df, decimals=0)
     self._count_df = self._count_df.astype(int)
     r_count_df = robjects.DataFrame(self._count_df)
     r_count_df.colnames = robjects.rinterface.NULL
     r_size_factors = r.estimateSizeFactorsForMatrix(r_count_df)
     return pd.Series(r_size_factors, index=self._count_df.columns)
コード例 #25
0
def convert_dataframe_columns(df, strings_as_factors=False):
    """
    Essentially the same as pandas.spy.common.convert_to_r_dataframe
    except we don't convert the index into strings


    We are just grabbing the column data here
    """

    import rpy2.rlike.container as rlc

    columns = rlc.OrdDict()

    #FIXME: This doesn't handle MultiIndex

    for column in df:
        value = df[column]
        value_type = value.dtype.type
        value = [
            item if pd.notnull(item) else NA_TYPES[value_type]
            for item in value
        ]

        value = VECTOR_TYPES[value_type](value)

        if not strings_as_factors:
            I = robjects.baseenv.get("I")
            value = I(value)

        columns[column] = value

    r_dataframe = robjects.DataFrame(columns)
    return r_dataframe
コード例 #26
0
ファイル: sz_plotting.py プロジェクト: svm-zhang/poolseq_tk
def make_manhattan(grdevices,
                   data,
                   raw_pvals_vector,
                   snps_to_highlight,
                   padj_cutoff,
                   out_manhattan,
                   title="",
                   xlable="",
                   xlim="-"):
    snp_names = []
    snp_pos = []
    chr_names = []
    for chr, pos in sorted(data.iterkeys()):
        snp_pos.append(pos)
        chr_names.append(data[chr, pos])
        snp_names.append("%s_%d" % (chr, pos))
    od_raw = rlc.OrdDict([("SNP", robjects.StrVector(snp_names)),
                          ("CHR", robjects.IntVector(chr_names)),
                          ("BP", robjects.IntVector(snp_pos)),
                          ("P", robjects.FloatVector(raw_pvals_vector))])

    color_vector = robjects.StrVector(["blue4", "orange3"])
    sig_snps = robjects.StrVector(snps_to_highlight)
    qqman = rpackages.importr('qqman')
    grdevices.pdf(out_manhattan)
    if xlim != "-":
        xmin = int(xlim.split(",")[0])
        xmax = int(xlim.split(",")[1])
        qqman.manhattan(robjects.DataFrame(od_raw),
                        highlight=sig_snps,
                        col=color_vector,
                        suggestiveline=False,
                        genomewideline=-1 * math.log10(padj_cutoff),
                        xlim=robjects.IntVector([xmin, xmax]),
                        xlab=xlable,
                        main=title)
    else:
        qqman.manhattan(robjects.DataFrame(od_raw),
                        highlight=sig_snps,
                        col=color_vector,
                        suggestiveline=False,
                        genomewideline=-1 * math.log10(padj_cutoff),
                        main=title,
                        ylim=robjects.IntVector([0, 10]),
                        chrlabs=robjects.StrVector(
                            ["2L", "2R", "3L", "3R", "X"]))
    grdevices.dev_off()
コード例 #27
0
    def run(self, data, regression, resources=None):
        """
        The method prints out summary of the BMA procedure and creates an imageplot.
        If resources has an entry 'bma_imageplot_filename', the imageplot is sent to this file as pdf.
        The method does not return any useful results - it is a tool for variable selection.
        Once you selected your variables, use estimate_linear_regression for further usage of the coefficients. 
        
        Expects an entry 'outcome' in resources that provides the values of the dependent variable.
        'data' is a 2D numpy array of the actual data (nobservations x ncoefficients),
            it can be created by Dataset.create_regression_data_for_estimation(...).
        'regression' is an instance of a regression class.
        """
        r = robjects.r
        if data.ndim < 2:
            raise StandardError, "Argument 'data' must be a 2D numpy array."

        nobs = data.shape[0]
        nvar = data.shape[1]
        constant_position = resources.get(
            "constant_position", array([],
                                       dtype='int32'))  #position for intercept

        if constant_position.size == 0:  #position for intercept
            constant_position = -1
            nvalues = nvar
        else:
            constant_position = constant_position[0]
            nvalues = nvar + 1

        beta = zeros(nvalues).astype(float32)

        coef_names = resources.get("coefficient_names", nvar * [])
        data_for_r = {}
        for icoef in range(len(coef_names)):
            data_for_r[coef_names[icoef]] = data[:, icoef]
        bma = importr("BMA")
        d = robjects.DataFrame(data_for_r)
        try:
            bma_params = {
                'x': d,
                'y': resources["outcome"],
                'glm.family': "gaussian",
                'strict': 1
            }
            #fit = bma.bic_glm(x=d, y=resources["outcome"], glm_family="gaussian", strict=1)
            fit = bma.bic_glm(**bma_params)
            fit[20] = ''  # to have less output in the summary
            r.summary(fit)
            filename = resources.get('bma_imageplot_filename', None)
            if filename is not None:
                r.pdf(file=filename)
                bma.imageplot_bma(fit)
                r['dev.off']()
            else:
                r.X11()
                bma.imageplot_bma(fit)
        except:
            logger.log_warning("Error in BMA procedure.")
        return {}
コード例 #28
0
def make_output_and(te_tss_cov, control_te_tss_cov, out_prefix, upstream,
                    downstream):
    # clean raw counts dir
    if os.path.isdir('%s_raw' % out_prefix):
        shutil.rmtree('%s_raw' % out_prefix)
    os.mkdir('%s_raw' % out_prefix)

    # dump raw counts to file
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            raw_out = open(
                '%s_raw/%s_%s.txt' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')),
                'w')
            for i in range(-upstream, downstream + 1):
                print >> raw_out, '%d\t%e\t%e' % (i, te_tss_cov[te][
                    upstream + i], control_te_tss_cov[te][upstream + i])
            raw_out.close()

    # clean plot dirs
    if os.path.isdir('%s_plot' % out_prefix):
        shutil.rmtree('%s_plot' % out_prefix)
    os.mkdir('%s_plot' % out_prefix)

    # make data structures
    tss_i = ro.IntVector(2 * range(-upstream, downstream + 1))
    labels = ro.StrVector(['Main'] * (upstream + downstream + 1) +
                          ['Control'] * (upstream + downstream + 1))
    for te in te_tss_cov:
        if te[0] in [
                'n', '*', 'HERVH-int', 'L2a', 'AluSx', 'AluJb', 'MIRb', 'LTR7'
        ] and te[1] in [
                'n', '*', 'LINE/L1', 'SINE/Alu', 'LTR/ERV1', 'LTR/ERVL-MaLR',
                'LINE/L2', 'LTR/ERVL', 'SINE/MIR', 'DNA/hAT-Charlie',
                'LTR/ERVK', 'DNA/TcMar-Tigger'
        ]:
            cov = ro.FloatVector(te_tss_cov[te] + control_te_tss_cov[te])
            df = ro.DataFrame({'tss_i': tss_i, 'cov': cov, 'label': labels})

            # construct full plot
            gp = ggplot2.ggplot(df) + \
                ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
                ggplot2.geom_point() + \
                ggplot2.scale_x_continuous('TSS index') + \
                ggplot2.scale_y_continuous('Coverage') + \
                ggplot2.scale_colour_discrete('')

            # plot to file
            grdevices.pdf(
                file='%s_plot/%s_%s.pdf' %
                (out_prefix, te[0].replace('/', '_'), te[1].replace('/', '_')))
            gp.plot()
            grdevices.dev_off()
def ortholog_blast(
    head_id,
    sequence_file,
    blast_out_value_tsv_file,
    read_function,
    write_function,
    blast_db_path,
    blast_out_xml_path,
    blast_out_asn_path,
    blast_out_txt_path,
    rbase
    ):
    #需要修改read
    if blast_out_value_tsv_file.exists() is True:
        return(read_function(
            str(blast_out_value_tsv_file),
            header = True,
            sep = "\t",
            **{'stringsAsFactors': False},
            **{'check.names': False}
        ))
    blast_db_file=blast_db_path/(head_id+"_blast_head_db")
    blast_out_xml_file=blast_out_xml_path/(head_id+"_blast_out.xml")
    blast_out_asn_file=blast_out_asn_path/(head_id+"_blast_out.asn")
    blast_out_txt_file=blast_out_txt_path/(head_id+"_blast_out.txt")
    if blast_out_xml_file.exists() is False:
        blastdb(sequence_file,blast_db_file)
        blast(blast_db_file,sequence_file,blast_out_xml_file,blast_out_asn_file,blast_out_txt_file)
    
    with open(blast_out_xml_file) as xml_fl:
        R_blast_vlaue_list=rbase.list()
        i=1
        for record in NCBIXML.parse(xml_fl):
            gene_name_list=[]
            blast_value_list=[]
            identity_perscent=0
            gene_name=record.query.split()[0]
            if gene_name[0:3]!="MGG": break
            if record.alignments:
                if len(record.alignments)>1:
                    for alignment in record.alignments:
                        max_flag=-1
                        if alignment.hit_def==gene_name:continue
                        for hsp in alignment.hsps:
                            if max_flag > -1: break
                            identity_perscent=hsp.identities/hsp.align_length
                            max_flag=max_flag+20
                            blast_value_list.append(identity_perscent)
                        gene_name_list.append(alignment.hit_def)
            MGG_head_id=[gene_name]*len(gene_name_list)
            R_blast_vlaue_list.rx2[i]=robjects.DataFrame({
                "gene_name":robjects.StrVector(gene_name_list),
                "blast_value":robjects.FloatVector(blast_value_list),
                "MGG_head":robjects.StrVector(MGG_head_id)
            })
            i=i+1
    R_blast_vlaue_df=rbase.do_call("rbind",R_blast_vlaue_list)
    write_function(R_blast_vlaue_df,**{'file': str(blast_out_value_tsv_file)},**{'append': False},**{'quote': False},**{'sep': "\t"},**{'row.names': False},**{'col.names': True})
    return R_blast_vlaue_df
コード例 #30
0
 def preprocess_df_R(self,dataframe):
     #this function parse a python dataframe to a R dataframe
     feature_dict = {}
     for colname in dataframe.columns:
         # What happens if we pass the wrong type?
         feature_dict[colname] = robjects.FloatVector(dataframe[colname])
     dataframe_R = robjects.DataFrame(feature_dict)
     return dataframe_R