Example #1
0
 def test_parse_dates_list(self):
     df = DataFrame({"date": date_range("1/1/2001", periods=10)})
     expected = df.to_html()
     res = self.read_html(expected, parse_dates=[1], index_col=0)
     tm.assert_frame_equal(df, res[0])
     res = self.read_html(expected, parse_dates=["date"], index_col=0)
     tm.assert_frame_equal(df, res[0])
def pandas_open_in_browser(df):
    import tempfile

    htmlfile = tempfile.NamedTemporaryFile(delete=False, suffix=".html")
    htmlfile.write(df.to_html().encode("utf-8"))
    htmlfile.close()
    os.system("$BROWSER '{}'".format(htmlfile.name))
    os.unlink(htmlfile.name)
Example #3
0
    def test_to_html_multiindex_sparsify(self):
        index = pd.MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], names=["foo", None])

        df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index)

        result = df.to_html()
        expected = """<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th></th>
      <th>0</th>
      <th>1</th>
    </tr>
    <tr>
      <th>foo</th>
      <th></th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td rowspan="2" valign="top"><strong>0</strong></td>
      <td><strong>0</strong></td>
      <td> 0</td>
      <td> 1</td>
    </tr>
    <tr>
      <td><strong>1</strong></td>
      <td> 2</td>
      <td> 3</td>
    </tr>
    <tr>
      <td rowspan="2" valign="top"><strong>1</strong></td>
      <td><strong>0</strong></td>
      <td> 4</td>
      <td> 5</td>
    </tr>
    <tr>
      <td><strong>1</strong></td>
      <td> 6</td>
      <td> 7</td>
    </tr>
  </tbody>
</table>"""
        self.assertEquals(result, expected)
Example #4
0
    def test_to_html_index_formatter(self):
        df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], columns=["foo", None], index=range(4))

        f = lambda x: "abcd"[x]
        result = df.to_html(formatters={"__index__": f})
        expected = """\
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>foo</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>a</th>
      <td> 0</td>
      <td> 1</td>
    </tr>
    <tr>
      <th>b</th>
      <td> 2</td>
      <td> 3</td>
    </tr>
    <tr>
      <th>c</th>
      <td> 4</td>
      <td> 5</td>
    </tr>
    <tr>
      <th>d</th>
      <td> 6</td>
      <td> 7</td>
    </tr>
  </tbody>
</table>"""
        self.assertEquals(result, expected)
Example #5
0
 def test_to_html_with_no_bold(self):
     x = DataFrame({"x": randn(5)})
     ashtml = x.to_html(bold_rows=False)
     assert "<strong>" not in ashtml
Example #6
0
    def test_to_html(self):
        # big mixed
        biggie = DataFrame({"A": randn(200), "B": tm.makeStringIndex(200)}, index=range(200))

        biggie["A"][:20] = nan
        biggie["B"][:20] = nan
        s = biggie.to_html()

        buf = StringIO()
        retval = biggie.to_html(buf=buf)
        self.assert_(retval is None)
        self.assertEqual(buf.getvalue(), s)

        self.assert_(isinstance(s, basestring))

        biggie.to_html(columns=["B", "A"], col_space=17)
        biggie.to_html(columns=["B", "A"], formatters={"A": lambda x: "%.1f" % x})

        biggie.to_html(columns=["B", "A"], float_format=str)
        biggie.to_html(columns=["B", "A"], col_space=12, float_format=str)

        frame = DataFrame(index=np.arange(200))
        frame.to_html()
Example #7
0
 def test_parse_dates_combine(self):
     raw_dates = Series(date_range("1/1/2001", periods=10))
     df = DataFrame({"date": raw_dates.map(lambda x: str(x.date())), "time": raw_dates.map(lambda x: str(x.time()))})
     res = self.read_html(df.to_html(), parse_dates={"datetime": [1, 2]}, index_col=1)
     newdf = DataFrame({"datetime": raw_dates})
     tm.assert_frame_equal(newdf, res[0])
Example #8
0
 def test_to_html_unicode(self):
     # it works!
     df = DataFrame({u"\u03c3": np.arange(10.0)})
     df.to_html()
     df = DataFrame({"A": [u"\u03c3"]})
     df.to_html()
Example #9
0
def test_convert():
    df = DataFrame(np.random.rand(4, 5), columns=list("abcde"))
    assert np.allclose(convert(df.to_html(), float), df, rtol=1e-4)
Example #10
0
testurl = "http://neurocoding.info/cgibin/testquery.py?s=00214&g=30&z=3"
form = cgi.FieldStorage()
print """
 <form method="post" action="index.cgi">
 <p>Season: <input type="text" name="season_code"/></p>
 <p>Game: <input type="text" name="gamenum"/></p>
 <p> <input type="submit" value="Submit"/> </p>
 </form>
"""
season_code = form.getvalue("season_code", "00214")
gamenum = form.getvalue("gamenum", "1").zfill(5)
gameid = season_code + gamenum
# print gameid
query = urlparse.parse_qs(urlparse.urlparse(testurl).query)

# Load appropriate JSON file, print to HTML table
json_path = "/var/www/data/json"
# json_path = '../json'
# json_file = open(json_path + '/pbp_' + query['s'][0] + str(query['g'][0]).zfill(5) + '.json','r')
json_file = open(json_path + "/pbp_" + gameid + ".json", "r")
j = json.loads(json_file.read())
df = DataFrame(j["rowSet"])
df.columns = j["headers"]
df2 = DataFrame([df.HOMEDESCRIPTION, df.NEUTRALDESCRIPTION, df.VISITORDESCRIPTION]).T
# vs = ['HOMEDESCRIPTION','NEUTRALDESCRIPTION','VISITORDESCRIPTION']
vs2 = ["Home", "", "Away"]
df2.columns = vs2
# df.columns = j['headers']
print df2.to_html()

print "</body></html>"
Example #11
0
def showT(data, rows=None):
    df = DataFrame(data)
    if rows:
        df = df[:rows]
    htprint(df.to_html())
Example #12
0
class PRCounter(object):
    COLUMNS = ["Numer", "PDenom", "RDenom", "P", "R", "F", "T", "N", "Acc"]

    def __init__(self):
        self._df = DataFrame(columns=PRCounter.COLUMNS)

    def __setitem__(self, k, v):
        if isinstance(v[0], int):
            N, gold_set, pred_set = v
            if gold_set or pred_set:
                assert N > 0, (N, gold_set, pred_set)
        else:
            gold_set, pred_set = v
            N = ""
        entry = {"Numer": len(gold_set & pred_set), "PDenom": len(pred_set), "RDenom": len(gold_set), "N": N}
        entry["P"] = entry["Numer"] / entry["PDenom"] if entry["PDenom"] else float("nan")
        entry["R"] = entry["Numer"] / entry["RDenom"] if entry["RDenom"] else float("nan")
        entry["F"] = (
            2 * entry["P"] * entry["R"] / (entry["P"] + entry["R"]) if (entry["P"] + entry["R"]) else float("nan")
        )
        if N == "":
            entry["T"] = None
            entry["Acc"] = None
        else:
            if len(gold_set) == len(pred_set) == N:
                entry["T"] = entry["Numer"]
            else:
                tp = entry["Numer"]
                fp = len(pred_set - gold_set)
                fn = len(gold_set - pred_set)
                entry["T"] = N - fp - fn
            assert entry["T"] >= 0, (entry, gold_set, pred_set)
            entry["Acc"] = float("nan") if N == 0 else entry["T"] / N
        df = DataFrame.from_items([(e, {k: entry[e]}) for e in PRCounter.COLUMNS])
        self._df = self._df.append(df)

    def __str__(self):
        return str(self._df)

    def __add__(self, that):
        # ensure all rows are present for both tables, filling in 0 if necessary
        # (otherwise the empty rows will be treated as if they contain NaN when adding)

        me = self._df
        you = that._df
        for row in me.index:
            if row not in that._df.index:
                you = you.append(
                    DataFrame.from_items([(e, {row: "" if me[e][row] == "" else 0}) for e in PRCounter.COLUMNS])
                )
        for row in you.index:
            if row not in self._df.index:
                me = me.append(
                    DataFrame.from_items([(e, {row: "" if you[e][row] == "" else 0}) for e in PRCounter.COLUMNS])
                )

        # add counts
        new_df = me + you

        # recompute ratios
        new_df["P"] = new_df["Numer"] / new_df["PDenom"]
        new_df["R"] = new_df["Numer"] / new_df["RDenom"]
        denom = new_df["P"] + new_df["R"]
        new_df["F"] = 2 * new_df["P"] * new_df["R"] / denom[denom > 0]
        new_df["Acc"] = new_df["T"] / new_df["N"]
        result = PRCounter()
        result._df = new_df
        return result

    def to_string(self, *args, **kwargs):
        return self._df.to_string(*args, **kwargs)

    def to_html(self, *args, **kwargs):
        return self._df.to_html(*args, **kwargs)

    def to_csv(self, *args, **kwargs):
        return self._df.to_csv(*args, **kwargs)
Example #13
0
class PRCounter(object):
    COLUMNS = ["Numer", "PDenom", "RDenom", "P", "R", "F", "T", "N", "Acc"]
    COMPUTE_RATIOS_ON_ADD = (
        False
    )  # if False, the division (compute_ratios()) will be deferred in case denominators are initially 0

    def __init__(self):
        self._df = DataFrame(columns=PRCounter.COLUMNS)

    def __setitem__(self, k, v):
        points = {}
        if isinstance(v[0], int):
            N, gold_set, pred_set = v
            if gold_set or pred_set:
                assert N > 0, (N, gold_set, pred_set)
        else:
            N = ""
            gold, pred = v
            pred_set = set(pred.keys()) if isinstance(pred, dict) else pred
            gold_set = set(gold.keys()) if isinstance(gold, dict) else gold
            if isinstance(gold, dict):
                points.update(gold)
                if isinstance(pred, dict):
                    for elt in gold_set & pred_set:
                        assert gold[elt] == pred[elt], (elt, gold[elt], pred[elt])
            if isinstance(pred, dict):
                points.update(pred)

        entry = {
            "Numer": sum(points.get(elt, 1) for elt in gold_set & pred_set),
            "PDenom": sum(points.get(elt, 1) for elt in pred_set),
            "RDenom": sum(points.get(elt, 1) for elt in gold_set),
            "N": N,
        }
        entry["P"] = entry["Numer"] / entry["PDenom"] if entry["PDenom"] else float("nan")
        entry["R"] = entry["Numer"] / entry["RDenom"] if entry["RDenom"] else float("nan")
        entry["F"] = (
            2 * entry["P"] * entry["R"] / (entry["P"] + entry["R"]) if (entry["P"] + entry["R"]) else float("nan")
        )
        if N == "":
            entry["T"] = None
            entry["Acc"] = None
        else:
            if len(gold_set) == len(pred_set) == N:
                entry["T"] = entry["Numer"]
            else:
                tp = entry["Numer"]
                fp = len(pred_set - gold_set)
                fn = len(gold_set - pred_set)
                entry["T"] = N - fp - fn
            assert entry["T"] >= 0, (entry, gold_set, pred_set)
            entry["Acc"] = float("nan") if N == 0 else entry["T"] / N
        df = DataFrame.from_items([(e, {k: entry[e]}) for e in PRCounter.COLUMNS])
        self._df = self._df.append(df)

    def __str__(self):
        return str(self._df)

    def __add__(self, that):
        # ensure all rows are present for both tables, filling in 0 if necessary
        # (otherwise the empty rows will be treated as if they contain NaN when adding)

        me = self._df
        you = that._df
        for row in me.index:
            if row not in that._df.index:
                you = you.append(
                    DataFrame.from_items([(e, {row: "" if me[e][row] == "" else 0}) for e in PRCounter.COLUMNS])
                )
        for row in you.index:
            if row not in self._df.index:
                me = me.append(
                    DataFrame.from_items([(e, {row: "" if you[e][row] == "" else 0}) for e in PRCounter.COLUMNS])
                )

        # add counts
        new_df = me + you

        result = PRCounter()
        result._df = new_df
        if self.COMPUTE_RATIOS_ON_ADD:  # recompute ratios
            self.compute_ratios()
        return result

    def compute_ratios(self):
        """
        new_df['P'] = new_df['Numer'] / new_df['PDenom']
            new_df['R'] = new_df['Numer'] / new_df['RDenom']
            denom = (new_df['P'] + new_df['R'])
            new_df['F'] = 2 * new_df['P'] * new_df['R'] / denom[denom>0]
            new_df['Acc'] = new_df['T'] / new_df['N']
        """
        df = self._df

        # if denominators are 0, set them to NaN
        for c in ("PDenom", "RDenom"):
            for r, v in enumerate(df[c]):
                if v == 0:
                    df[c][r] = float("nan")

        df["P"] = df["Numer"] / df["PDenom"]
        df["R"] = df["Numer"] / df["RDenom"]
        denom = df["P"] + df["R"]
        df["F"] = 2 * df["P"] * df["R"] / denom[denom > 0]
        df["Acc"] = df["T"] / df["N"]

    def to_string(self, *args, **kwargs):
        return self._df.to_string(*args, **kwargs)

    def to_html(self, *args, **kwargs):
        return self._df.to_html(*args, **kwargs)

    def to_csv(self, *args, **kwargs):
        return self._df.to_csv(*args, **kwargs)