Example #1
1
def main():

    logger = get_root_logger()
    get_header(logger, "LOADING PROJECTIONS")

    client = APIClient()

    # grab dataframe shape from a trial run
    data = client.get_data("weekly-projections", "json", "QB")
    test_df = json_normalize(data["Projections"])

    # get DF structure from columns in test_df
    cols = test_df.columns
    df = DataFrame(columns=cols)

    # grab current week
    current_week = test_df.week.values[0]

    # loop through all weeks up to current week
    for wk in [str(x) for x in range(int(current_week))]:
        logger.info("Processing projections for week {0}".format(int(wk) + 1))
        # loop through all positions
        for pos in ["QB", "RB", "WR", "TE", "K", "DEF"]:
            tmp_data = client.get_data("weekly-projections", "json", pos, wk)
            tmp_df = json_normalize(tmp_data["Projections"])
            df = df.append(tmp_df)

    # import this df directly to PG DB
    conn = DBClient()
    conn.load(df, "projections", schema="raw", if_exists="replace")
Example #2
0
class Record(object):
    def __init__(self):
        self.trade_history = DataFrame()
        self.position_history = DataFrame()
        self.portfolio_value_history = DataFrame()

    def update_trade(self, date, trade_type, symbol, amount, price):
        newtrade = DataFrame(
            {"Date": [date], "Trade_type": [trade_type], "Symbol": [symbol], "Amount": [amount], "Price": [price]}
        )
        self.trade_history = self.trade_history.append(newtrade, ignore_index=True)

    def update_position(self, date, p):
        newposition = DataFrame(
            {
                "Date": [date],
                "Symbol": [p.symbol],
                "Amount": [p.amount],
                "Avg_price": [p.avg_price],
                "Position_value": [p.position_value],
            }
        )
        self.position_history = self.position_history.append(newposition, ignore_index=True)

    def update_portfolio_value(self, date, port, pos, cash):
        newport = DataFrame({"Date": [date], "Portfolio_value": [port], "Position_value": [pos], "Cash": [cash]})
        self.portfolio_value_history = self.portfolio_value_history.append(newport, ignore_index=True)
    def test_append_empty_dataframe(self):

        # Empty df append empty df
        df1 = DataFrame([])
        df2 = DataFrame([])
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        # Non-empty df append empty df
        df1 = DataFrame(np.random.randn(5, 2))
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        # Empty df with columns append empty df
        df1 = DataFrame(columns=["bar", "foo"])
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)

        # Non-Empty df with columns append empty df
        df1 = DataFrame(np.random.randn(5, 2), columns=["bar", "foo"])
        df2 = DataFrame()
        result = df1.append(df2)
        expected = df1.copy()
        assert_frame_equal(result, expected)
Example #4
0
def getIndexChangeRate(startDate, endDate):
    df_result = DataFrame()
    df = ts.get_hist_data("sh", start=startDate, end=endDate).reset_index()
    df["gap"] = df["high"] - df["low"]
    df["gap_rate"] = df["gap"] / df["close"] * 100
    df["mkt"] = "sh"
    df_result = df_result.append(df)

    df = ts.get_hist_data("sz", start=startDate, end=endDate).reset_index()
    df["gap"] = df["high"] - df["low"]
    df["gap_rate"] = df["gap"] / df["close"] * 100
    df["mkt"] = "sz"
    df_result = df_result.append(df)

    df = ts.get_hist_data("zxb", start=startDate, end=endDate).reset_index()
    df["gap"] = df["high"] - df["low"]
    df["gap_rate"] = df["gap"] / df["close"] * 100
    df["mkt"] = "zxb"
    df_result = df_result.append(df)

    df = ts.get_hist_data("cyb", start=startDate, end=endDate).reset_index()
    df["gap"] = df["high"] - df["low"]
    df["gap_rate"] = df["gap"] / df["close"] * 100
    df["mkt"] = "cyb"
    df_result = df_result.append(df)

    fileName = r"D:\stock\index_changeRate_" + startDate + "_" + endDate + ".csv"
    df_result = df_result.loc[:, ["date", "mkt", "close", "volume", "price_change", "p_change", "gap", "gap_rate"]]
    df_result = df_result.sort_index(by="date", ascending=False)
    df_result.to_csv(fileName, index=False)
Example #5
0
def get_sex_type():
    file_name = "data/info_train.csv"
    y = pd.read_csv(file_name, header=None, index_col=0)
    male_id = y[y[1] < 7].index
    m = DataFrame([0] * male_id.size, index=male_id, columns=["sex"])
    female_id = y[y[1] > 6].index
    f = DataFrame([1] * female_id.size, index=female_id, columns=["sex"])
    m.append(f).to_csv("data/train_sex.csv")
Example #6
0
    def test_append(self):
        begin_index = self.frame.index[:5]
        end_index = self.frame.index[5:]

        begin_frame = self.frame.reindex(begin_index)
        end_frame = self.frame.reindex(end_index)

        appended = begin_frame.append(end_frame)
        assert_almost_equal(appended["A"], self.frame["A"])

        del end_frame["A"]
        partial_appended = begin_frame.append(end_frame)
        self.assertIn("A", partial_appended)

        partial_appended = end_frame.append(begin_frame)
        self.assertIn("A", partial_appended)

        # mixed type handling
        appended = self.mixed_frame[:5].append(self.mixed_frame[5:])
        assert_frame_equal(appended, self.mixed_frame)

        # what to test here
        mixed_appended = self.mixed_frame[:5].append(self.frame[5:])
        mixed_appended2 = self.frame[:5].append(self.mixed_frame[5:])

        # all equal except 'foo' column
        assert_frame_equal(
            mixed_appended.reindex(columns=["A", "B", "C", "D"]), mixed_appended2.reindex(columns=["A", "B", "C", "D"])
        )

        # append empty
        empty = DataFrame({})

        appended = self.frame.append(empty)
        assert_frame_equal(self.frame, appended)
        self.assertIsNot(appended, self.frame)

        appended = empty.append(self.frame)
        assert_frame_equal(self.frame, appended)
        self.assertIsNot(appended, self.frame)

        # overlap
        self.assertRaises(ValueError, self.frame.append, self.frame, verify_integrity=True)

        # new columns
        # GH 6129
        df = DataFrame({"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}})
        row = Series([5, 6, 7], index=["a", "b", "c"], name="z")
        expected = DataFrame({"a": {"x": 1, "y": 2, "z": 5}, "b": {"x": 3, "y": 4, "z": 6}, "c": {"z": 7}})
        result = df.append(row)
        assert_frame_equal(result, expected)
    def test_append_list_of_series_dicts(self):
        df = DataFrame(np.random.randn(5, 4), columns=["foo", "bar", "baz", "qux"])

        dicts = [x.to_dict() for idx, x in df.iterrows()]

        result = df.append(dicts, ignore_index=True)
        expected = df.append(df, ignore_index=True)
        assert_frame_equal(result, expected)

        # different columns
        dicts = [{"foo": 1, "bar": 2, "baz": 3, "peekaboo": 4}, {"foo": 5, "bar": 6, "baz": 7, "peekaboo": 8}]
        result = df.append(dicts, ignore_index=True)
        expected = df.append(DataFrame(dicts), ignore_index=True)
        assert_frame_equal(result, expected)
Example #8
0
def import_training_data(fname=None, verbose=False):
    count = 0
    result = DataFrame({"text": [], "class": [], "rumor": [], "event": [], "features": []})
    for event in rumor_terms.event_rumor_map:
        for rumor in rumor_terms.event_rumor_map[event]:
            if verbose:
                print "processing data from %s, %s" % (event, rumor)
            pos_examples = [
                x
                for x in client["code_comparison"][rumor].find(
                    {"first_final": {"$in": ["Affirm", "Deny", "Neutral"]}, "second_final": "Uncertainty"}
                )
            ]
            neg_examples = [
                x
                for x in client["code_comparison"][rumor].find(
                    {"first_final": {"$in": ["Affirm", "Deny", "Neutral"]}, "second_final": {"$ne": "Uncertainty"}}
                )
            ]
            examples = pos_examples
            examples += random.sample(neg_examples, len(pos_examples))
            for tweet in examples:
                if tweet["text"]:
                    # full_tweet = get_tweet_meta_data(tweet,event,rumor)
                    features = {}
                    # if full_tweet:
                    #    features['has_mention'] = find_mention(full_tweet['text'])
                    # else:
                    #    features['has_mention'] = False
                    if "?" in tweet["text"]:
                        features["is_question"] = True
                    else:
                        features["is_question"] = False
                    text = process_tweet(tweet, event, rumor)
                    if "Uncertainty" in tweet["second_final"]:
                        classification = 1
                    else:
                        classification = 0
                    result = result.append(
                        DataFrame(
                            {
                                "text": text,
                                "class": classification,
                                "rumor": rumor,
                                "event": event,
                                "features": json.dumps(features),
                            },
                            index=[count],
                        )
                    )
                    count += 1
    result = result.reindex(numpy.random.permutation(result.index))

    if fname:
        fpath = os.path.join(os.path.dirname(__file__), os.pardir, "dicts/") + fname
        f = open(fpath, "w")
        pickle.dump(result, f)
    if verbose:
        print result
    return result
Example #9
0
    def check_frames_difference(self, actual, expected, sizecol="r"):
        """
        Compare DataFrame items by index and column and
        raise AssertionError if any item is not equal.

        Ordering is unimportant, items are compared only by label.
        NaN and infinite values are supported.

        Parameters
        ----------
        actual : pandas.DataFrame
        expected : pandas.DataFrame
        use_close : bool, optional
            If True, use numpy.testing.assert_allclose instead of
            numpy.testing.assert_equal.

        """
        unmatched = 0
        value_diff = DataFrame(columns=expected.columns)

        for i, exp_row in expected.iterrows():
            # tolerance in pixels
            tolerance = max(exp_row[sizecol] * 0.1, 5.0)
            act_row_index = self.find_closest_row_index(exp_row, actual, "x", "y", tolerance)
            if act_row_index is False:
                unmatched += 1
                continue

            act_row = actual.loc[act_row_index]
            diff = exp_row - act_row[expected.columns]
            value_diff = value_diff.append(diff, ignore_index=True)
        return unmatched, value_diff
def prep_df(filename, values):
    cnn_ba = DataFrame(read_csv(filename))
    col = cnn_ba.columns
    df_rec = DataFrame(columns=col)
    for i in range(len(cnn_ba)):
        if (
            cnn_ba["style1"][i] in values
            or cnn_ba["style2"][i] in values
            or cnn_ba["style3"][i] in values
            or cnn_ba["style4"][i] in values
            or cnn_ba["style5"][i] in values
        ):

            row = cnn_ba.ix[i]
            df_rec = df_rec.append(row)

    # add column with bare ID:
    baseID = []
    a = df_rec["ID"].reset_index()

    for i in range(len(df_rec)):
        b = re.search(r"\w+\_", a["ID"][i])
        c = b.group().strip("_")
        baseID.append(c)

    df_rec["baseID"] = baseID
    return df_rec
Example #11
0
def pickle_from_db(event_list, fname, verbose=False):
    for event in event_list:
        result = DataFrame({"text": [], "event": [], "features": [], "unique_id": [], "raw_text": []})
        count = 0
        if verbose:
            print "processing data from %s" % (event)
        examples = client[insert_db][event].find()
        for tweet in examples:
            if verbose and count % 1000 == 0 and count != 0:
                print "processed %s tweets" % count
            if tweet["text"]:
                result = result.append(
                    DataFrame(
                        {
                            "text": tweet["text"],
                            "event": event,
                            "features": json.dumps(tweet["features"]),
                            "unique_id": tweet["unique_id"],
                            "raw_text": tweet["raw_text"],
                        },
                        index=[count],
                    )
                )
                count += 1
                if count == 50:
                    break
        result = result.reindex(numpy.random.permutation(result.index))

        fpath = os.path.join(os.path.dirname(__file__), os.pardir, "dicts/") + event + "_" + fname
        f = open(fpath, "w")
        pickle.dump(result, f)
        f.close()
        if verbose:
            print result
            print "dumped %s tweets" % len(result)
def feature_profile(signal_key, base_key):
    """
    This function takes one key from each signal and base respectively and 
    creates the profile array for this feature combination.
    """
    global window_size
    collect = DataFrame()
    # collects the vector for each interval which holds the mean values
    # for each window

    count = 0
    for interval in db[base_key]:

        # check that interval is not too short
        bin_size = int(interval.length * window_size)  # round to floor
        if bin_size < 1:  # see comment below
            continue

        positions = get_window_positions(interval.start, interval.length)

        vector = []
        for p in positions:
            iv = (interval.chrom, p, p + bin_size)
            v = list(ds[signal_key][GenomicInterval(*iv)].values())
            vector.append(fun(v))  # vector is of type list

        # collect result
        collect = collect.append(Series(vector), ignore_index=True)
    return collect
Example #13
0
def handleBi5(infile, fileDataFrame):

    if os.path.getsize(infile) == 0:
        return fileDataFrame

    array = infile.split("/")
    print array
    alen = len(array)

    dateWithoutHour = long(datetime(int(array[alen - 4]), int(array[alen - 3]), int(array[alen - 2])).strftime("%s"))
    dateWithoutMilisec = (dateWithoutHour + int(array[alen - 1].split("_")[0].split("h")[0]) * 3600) * 1000
    subprocess.call("xz -dkc --suffix=bi5 " + infile + ">tmp.bin", shell=True)

    hdfDir = "./hdf/" + infile.split("/")[2]
    if not os.path.exists(hdfDir):
        os.makedirs(hdfDir)
    cvsFileName = hdfDir + "/" + infile.split("/")[3]

    if fileDataFrame.empty:
        if os.path.exists(cvsFileName):
            fileDataFrame = read_csv(cvsFileName, index_col=0)
        else:
            fileDataFrame = DataFrame()

    fileDataFrame = fileDataFrame.append(processBinFile("tmp.bin", dateWithoutMilisec))

    print fileDataFrame.iloc[0]
    return fileDataFrame
Example #14
0
def masterAssemble(client):
    activities = list(client.get_activities())
    print (len(activities))
    athlete = client.get_athlete()
    # add in name of run

    path = os.path.dirname(__file__)

    try:
        df = pd.read_pickle(str(path) + "/master_dfs/" + str(athlete.id) + "masterDf.txt")
    except IOError:
        df = DataFrame({})

    for i in range(len(activities)):
        if (len(df) == 0) or (float(activities[i].id) not in list(df.activityId)):
            activityId = activities[i].id
            run = client.get_activity_streams(activityId, types=["time", "latlng", "distance", "heartrate", "altitude"])
            latlng = run["latlng"].data
            time = run["time"].data
            distance = run["distance"].data
            heartrate = run["heartrate"].data
            altitude = run["altitude"].data
            date = activities[i].start_date_local
            activity = activityId
            dfi = assemble(date, activityId, heartrate, distance, time, altitude, latlng)
            df = df.append(dfi)
            print (dfi)
    return df
Example #15
0
def wsjfs_data(symbol):
    coded = urllib.quote(symbol)

    url = (
        "http://ifs.futuresource.com/charts/charts.jsp?cID=WSJ&iFSsymbols=%s&iFScompareTo=&iFSperiod=D&iFSvminutes=&iFSchartsize=800x550&iFSbardensity=LOW&iFSbartype=BAR&iFSstudies=&iFSohlc=true"
        % (coded)
    )
    print url

    f = urllib.urlopen(url)
    txt = f.read()
    soup = BeautifulSoup(txt)
    ars = soup.findAll("area")
    data = map(lambda x: x["onmouseover"], ars)
    splitre = re.compile("Date: *([0-9/]*) *Open: *([0-9.]*) *High: *([0-9.]*) *Low: *([0-9.]*) *Close: *([0-9.]*)")
    df = DataFrame()
    for row in data:
        m = splitre.search(row)
        if not m:
            continue
        S = {"date": Series([datetime.strptime(m.group(1), "%m/%d/%Y").date()])}
        for k, v in zip(["open", "high", "low", "close"], [m.group(2), m.group(3), m.group(4), m.group(5)]):
            S[k] = Series([float(v)])
        df = df.append(DataFrame(S), ignore_index=True)
    return df
Example #16
0
    def test_append_missing_column_proper_upcast(self):
        df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")})
        df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)})

        appended = df1.append(df2, ignore_index=True)
        self.assertEqual(appended["A"].dtype, "f8")
        self.assertEqual(appended["B"].dtype, "O")
Example #17
0
def frame_for_id(features, feat_path="out", data_ids=sts.sts12.train_ids, data_dir="STS2012-train"):
    frame = DataFrame()

    for data_id in data_ids:
        data = {}

        for feat_id in features:
            data_id_dir = data_id[9:] if data_id.startswith("surprise.") else data_id
            feat_fn = os.path.join(feat_path, data_dir, data_id_dir, "%s.txt" % feat_id)

            data[feat_id] = series_from_feat(feat_fn)

        new_frame = DataFrame(data)
        new_frame["data_id"] = data_id

        gs_fn = os.path.join(repos_dir, "data", data_dir, "STS.gs.%s.txt" % data_id)

        if os.path.exists(gs_fn):
            new_frame["gs"] = Series(loadtxt(gs_fn))
        else:
            new_frame["gs"] = None

        frame = frame.append(new_frame)

    frame["data_set"] = data_dir

    return frame
Example #18
0
def read_data(features, feat_path="out"):
    frame = DataFrame()

    for data_path, data_ids in data_paths_and_ids:
        frame = frame.append(frame_for_id(features, feat_path, data_ids, data_path))

    return frame
def make_picture(num=10, coefficient=None, function=np.sin):
    if coefficient is None:
        coefficient = [0, 1, 3, 9]
    train_set = create_dataset(num, function)
    df_ws = DataFrame()

    fig = plt.figure()
    for c, m in enumerate(coefficient):
        f, ws = resolve(train_set, m)
        df_ws = df_ws.append(Series(ws, name="M=%d" % m))

        subplot = fig.add_subplot(2, 2, c + 1)
        subplot.set_xlim(-0.05, 1.05)
        subplot.set_ylim(-1.5, 1.5)
        subplot.set_title("M=%d" % m)

        subplot.scatter(train_set.x, train_set.y, marker="o", color="blue")

        linex = np.linspace(0, 1, 101)
        liney = function(2 * np.pi * linex)
        subplot.plot(linex, liney, color="green", linestyle="--")

        linex = np.linspace(0, 1, 101)
        liney = f(linex)
        label = "E(RMS)=%.2f" % rms_error(train_set, f)
        subplot.plot(linex, liney, color="red", label=label)
        subplot.legend(loc=1)

    return fig
Example #20
0
def create_dataset(num):
    dataset = DataFrame(columns=["x", "y"])
    for i in range(num):
        x = float(i) / float(num - 1)
        y = np.sin(2 * np.pi * x) + normal(scale=0.3)
        dataset = dataset.append(Series([x, y], index=["x", "y"]), ignore_index=True)
    return dataset
Example #21
0
def _extract_data(file_name, filters, fields=None, summary=None, classname="Table", mode="walk", hash=""):
    """
    Not meant for direct use.  This is broken out of :func:`extract_data` so we
    can wrap the code in a caching decorator to speed up loading of data from
    disk.  The hash is created by :func:`extract_data` to ensure that the cache
    is cleared if the last modified time changes.  Note that if you move the
    file to a different folder, this does not clear the cache.
    """
    log.info("... No cached copy of data found, reloading data")
    with tables.openFile(file_name, "r") as h:
        data = DataFrame()
        if mode == "walk":
            iterator = walk_nodes(h.root, filters, classname)
        elif mode == "pattern":
            iterator = p_iter_nodes(h.root, filters)
        else:
            raise ValueError, "Unsupported mode {}".format(mode)

        for node in iterator:
            log.info("... Found node %s", node._v_pathname)
            if type(node) == tables.Table:
                frame = extract_node_data(node, fields, summary)
                data = data.append(frame, ignore_index=True)
            else:
                raise NotImplementedError
    return data
Example #22
0
def convertToPutJson(csv_file):
    df = cleanColumns(read_csv(csv_file))
    putColumns = ["method", "recordId", "body"]
    putDf = DataFrame(columns=putColumns)

    for recordId in df.index:
        print "Converting data for recordId {recordId}...".format(recordId=recordId)
        body = {}

        for col in df.columns:
            body[str(col).strip()] = [str(df[col][recordId]).strip()]

        putDfRow = DataFrame([["PUT", str(recordId), body]], columns=putColumns)
        putDf = putDf.append(putDfRow)

    json_file = sub("csv|txt", "json", csv_file)
    putDf.to_json(json_file, orient="records")

    with open(json_file, "r") as target:
        putData = target.read()

    target = open(json_file, "w")
    putData = putData.replace("},{", "}\n\n{")[1:-1]
    target.write(putData)
    target.close()

    print "Successfully created put data!"
    return json_file
Example #23
0
def getFeatures(filename):
    csvfile = pd.read_csv(filename)  # Reading .csv files containing tweets.
    tweet_ids = csvfile["id_str"]  # Copying the 'id_str' attribute values to a item.
    length = len(tweet_ids)  # Getting the length of 'tweet_ids'.

    df = DataFrame(d, index=[0])  # Creating a DataFrame

    twitter = Twython(APP_KEY, APP_SECRET, oauth_version=2)
    ACCESS_TOKEN = twitter.obtain_access_token()
    twitter = Twython(APP_KEY, access_token=ACCESS_TOKEN)
    # Generating Access Token

    for i in range(0, length):
        status = twitter.show_status(id=tweet_ids[i])
        d["id"] = status["id_str"].encode("utf-8")
        d["created_at"] = status["created_at"].encode("utf-8")
        d["from_user"] = status["user"]["screen_name"].encode("utf-8")
        d["followers_count"] = status["user"]["followers_count"]
        d["friends_count"] = status["user"]["friends_count"]
        d["statuses_count"] = status["user"]["statuses_count"]
        d["verified"] = status["user"]["verified"]
        d["location"] = 0 if (len(status["user"]["location"].encode("utf-8")) == 0) else 1
        d["text"] = status["text"].encode("utf-8")
        d["retweet_count"] = status["retweet_count"]
        d["favorite_count"] = status["favorite_count"]
        d["hashtag_count"] = len(status["entities"]["hashtags"])
        d["url_count"] = len(status["entities"]["urls"])
        d["mentions_count"] = len(status["entities"]["user_mentions"])
        if len(status["entities"]["urls"]) > 0:
            for x in range(0, len(status["entities"]["urls"])):
                d["links"] += status["entities"]["urls"][x]["expanded_url"].encode("utf-8") + "  "
        df = df.append(d, ignore_index=True)
        df.to_csv("NSamples.csv")  # Saving file to disk
        d["links"] = ""
    print "\nAll Done!"
Example #24
0
    def predict(self, tree):
        """
        TODO Should take an array and predict every item. A score can be stored.
        It would follow the guidelines set by scikit-learn.
        """
        tree_rules = self.extract_rules(tree)
        df = DataFrame(columns=["label", "prob"])
        gb = self.posteriori.groupby("label")

        for key, indexes in gb.groups.items():
            apriori_prob = self.apriori[self.apriori.label == key]["freq"].values[0]
            prob = apriori_prob

            group_df, missing_prob = self.apply_smoothing(self.posteriori.ix[indexes], tree_rules)

            for rule in tree_rules:
                prob_evidence = group_df[group_df.rule == rule]["freq"]
                if len(prob_evidence) == 0:
                    prob_evidence = missing_prob
                else:
                    prob_evidence = prob_evidence.values[0]
                prob *= prob_evidence

            post = DataFrame({"label": [key], "prob": [prob]})
            df = df.append(post)

        df.index = np.arange(df.index.size)
        df = df.sort(columns="prob", ascending=False)
        return df.ix[df["prob"].idxmax()]
Example #25
0
    def test_append_length0_frame(self):
        df = DataFrame(columns=["A", "B", "C"])
        df3 = DataFrame(index=[0, 1], columns=["A", "B"])
        df5 = df.append(df3)

        expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
        assert_frame_equal(df5, expected)
Example #26
0
    def test_crossed_dtypes_weird_corner(self):
        columns = ["A", "B", "C", "D"]
        df1 = DataFrame(
            {
                "A": np.array([1, 2, 3, 4], dtype="f8"),
                "B": np.array([1, 2, 3, 4], dtype="i8"),
                "C": np.array([1, 2, 3, 4], dtype="f8"),
                "D": np.array([1, 2, 3, 4], dtype="i8"),
            },
            columns=columns,
        )

        df2 = DataFrame(
            {
                "A": np.array([1, 2, 3, 4], dtype="i8"),
                "B": np.array([1, 2, 3, 4], dtype="f8"),
                "C": np.array([1, 2, 3, 4], dtype="i8"),
                "D": np.array([1, 2, 3, 4], dtype="f8"),
            },
            columns=columns,
        )

        appended = df1.append(df2, ignore_index=True)
        expected = DataFrame(np.concatenate([df1.values, df2.values], axis=0), columns=columns)
        tm.assert_frame_equal(appended, expected)

        df = DataFrame(np.random.randn(1, 3), index=["a"])
        df2 = DataFrame(np.random.randn(1, 4), index=["b"])
        result = concat([df, df2], keys=["one", "two"], names=["first", "second"])
        self.assertEqual(result.index.names, ("first", "second"))
Example #27
0
class IPythonNotebookBokehMultiprocessPlotObserver(AbstractParallelObserver):
    __name__ = "IPython Notebook Bokeh Multiprocess Plot Observer"

    def __init__(self, url="default", color_map={}, *args, **kwargs):
        super(IPythonNotebookBokehMultiprocessPlotObserver, self).__init__(*args, **kwargs)
        self.url = url
        self.plotted = False
        self.connections = {}
        self.color_map = color_map
        self.data_frame = DataFrame(columns=["iteration", "island", "color", "fitness"])

    def _create_client(self, i):
        self.clients[i] = IPythonNotebookBokehMultiprocessPlotObserverClient(queue=self.queue, index=i)

    def start(self):
        self._plot()
        AbstractParallelObserver.start(self)

    def _plot(self):
        self.plotted = True
        self.uuid = uuid1()
        output_notebook(url=config.bokeh_url, docname=str(self.uuid), hide_banner=True)
        self.plot = figure(title="Best solution convergence plot", tools="")
        self.plot.scatter([], [], color=self.color_map, fill_alpha=0.2, size=7)
        self.plot.xaxis.axis_label = "Iteration"
        self.plot.yaxis.axis_label = "Fitness"

        renderer = self.plot.select(dict(type=GlyphRenderer))
        self.ds = renderer[0].data_source
        show(self.plot)

    def _process_message(self, message):
        if not self.plotted:
            self._plot()

        index = message["index"]
        df = DataFrame(
            {
                "iteration": [message["iteration"]],
                "fitness": [message["fitness"]],
                "color": [self.color_map[index]],
                "island": [index],
            }
        )
        self.data_frame = self.data_frame.append(df, ignore_index=True)
        if message["iteration"] % message["n"] == 0:
            self._update_plot()

    def _update_plot(self):
        self.ds.data["x"] = self.data_frame["iteration"]
        self.ds.data["y"] = self.data_frame["fitness"]
        self.ds.data["fill_color"] = self.data_frame["color"]
        self.ds.data["line_color"] = self.data_frame["color"]
        self.ds._dirty = True
        cursession().store_objects(self.ds)

    def stop(self):
        self.data_frame = DataFrame(columns=["iteration", "island", "color", "fitness"])
        self.plotted = False
Example #28
0
    def update(self, today=datetime.today()):
        df = self.data()
        final = False
        while not final:
            if "date" in df.columns:
                maxdate = max(df["date"])
            else:
                maxdate = max(df.index)
            final = self.isfinal(maxdate)
            if not final:
                print "%s is not final, stripping" % maxdate
                if "date" in df.columns:
                    df = df[df["date"] != maxdate]
                else:
                    df = df.reindex(df.index - [maxdate])
        print "maxdate = %s, today = %s" % (maxdate, today)
        newdf = DataFrame()
        if self.chunktype == "YEAR":
            for y in range(maxdate.year, today.year + 1):
                print "performing update for %d" % (y)
                updf = self._updateyear(y)
                print updf[-3:]
                newdf = newdf.append(updf, ignore_index="date" in df.columns)
        elif self.chunktype == "DAY":
            start = maxdate + timedelta(days=1)
            start = datetime(*(start.timetuple()[:6]))
            dr = DateRange(start, today)
            for d in dr:
                if d == datetime(2011, 12, 26) or d == datetime(2012, 1, 2):
                    continue
                print "performing update for %s" % (d)
                updf = self._updateday(d.date())
                print updf[-3:]
                newdf = newdf.append(updf, ignore_index="date" in df.columns)
        else:
            raise NameError("unknown chunktype " + self.chunktype)

        if "date" in df.columns:
            newdf = newdf[newdf["date"] > maxdate]
        else:
            print newdf.index[-3:]
            newindex = filter(lambda d: d > maxdate, newdf.index)
            print "fetched %d rows, %d rows more recent than maxdate = %s" % (len(newdf), len(newindex), maxdate)
            newdf = newdf.reindex(newindex)
        print "end of new data: %s" % (newdf[-3:])
        self._cache = df.append(newdf, ignore_index="date" in df.columns)
Example #29
0
 def getDateTimeSeries(self, instrument=None):
     if instrument is None:
         __dateTime = DataFrame()
         for element in self.__instrument:
             __dateTime = __dateTime.append(self.__feed[element].getPriceDataSeries().getDateTimes())
         __dateTime = __dateTime.drop_duplicates([0])
         return __dateTime.values  # 此时返回的为二维数组
     return self.__feed[instrument].getPriceDataSeries().getDateTimes()
Example #30
0
    def set_targets_from_file(self, filename=None, year=None):
        """
        Loads targets from file and display them in the frame
        """

        if year is None:
            year = str(CONF.get("simulation", "datesim").year)

        if filename is None:
            fname = "actualisation_groups.h5"
            data_dir = CONF.get("paths", "data_dir")
            filename = os.path.join(data_dir, fname)

        store = HDFStore(filename)

        # Builds openfisca variables from irpp declaration variables
        df_c = store["corresp"]
        of_vars = dict()
        for col in df_c.columns:
            of_vars[col] = list(unique(df_c[col]).dropna())

        df_a = store["amounts"]
        df_b = store["benef"]
        store.close()

        df_a1 = DataFrame({"amount": df_a[year]})

        df_a = DataFrame(columns=["amount"])

        for of_var, declar_vars_list in of_vars.iteritems():
            amount = 0
            for case in declar_vars_list:
                a = df_a1.get_value(case, "amount")
                if a is not NaN:
                    amount += a
            df_a1.drop(declar_vars_list, axis=0, inplace=True)
            row = DataFrame(dict(amount=[amount]), index=[of_var])
            df_a = df_a.append(row)

        df_a = df_a.append(df_a1)

        self.vars_df = df_a
        self.vars_df.index.names = ["var"]
        self.fill_vars()
        self.fill_coeffs()