def dates(args):
    if args.range:
        args.range = args.range.split("--")
    if args.action == print_max_datetimes:
        DatasetDatabase(args.database_file).connect().print_max_date_times()
    elif args.action == print_min_datetimes:
        DatasetDatabase(args.database_file).connect().print_min_date_times()
    elif args.action == print_start_end_datetimes:
        DatasetDatabase(args.database_file).connect().print_start_end_points(range=args.range,
                                                                             point_threshold=args.threshold)
    elif args.action == plot_dates:
        if not args.all:
            point_dic = DatasetDatabase(args.database_file).connect() \
                .get_start_end_points(range=args.range, use_file=args.use_file, point_threshold=args.threshold)
            datetime_pairs = []
            for key, value in point_dic.items():
                datetime_pairs.append(value)
            DatasetPlotter.plot_start_end_points(sorted(datetime_pairs, key=lambda x: x[0] + x[-1]))
        else:
            point_dic = DatasetDatabase(args.database_file).connect() \
                .get_all_points(range=args.range, use_file=args.use_file, point_threshold=args.threshold)
            points = []
            for key, value in point_dic.items():
                points.append(value)
            DatasetPlotter.plot_all_points(sorted(points, key=lambda x: x[0] + x[-1]))
def dates(args):
    if args.range:
        args.range = args.range.split("--")
    if args.action == print_max_datetimes:
        DatasetDatabase(args.database_file).connect().print_max_date_times()
    elif args.action == print_min_datetimes:
        DatasetDatabase(args.database_file).connect().print_min_date_times()
    elif args.action == print_start_end_datetimes:
        DatasetDatabase(args.database_file).connect().print_start_end_points(
            range=args.range, point_threshold=args.threshold)
    elif args.action == plot_dates:
        if not args.all:
            point_dic = DatasetDatabase(args.database_file).connect() \
                .get_start_end_points(range=args.range, use_file=args.use_file, point_threshold=args.threshold)
            datetime_pairs = []
            for key, value in point_dic.items():
                datetime_pairs.append(value)
            DatasetPlotter.plot_start_end_points(
                sorted(datetime_pairs, key=lambda x: x[0] + x[-1]))
        else:
            point_dic = DatasetDatabase(args.database_file).connect() \
                .get_all_points(range=args.range, use_file=args.use_file, point_threshold=args.threshold)
            points = []
            for key, value in point_dic.items():
                points.append(value)
            DatasetPlotter.plot_all_points(
                sorted(points, key=lambda x: x[0] + x[-1]))
def test(testfiles):
    dataset = testfiles["data10000"]
    sqlite_db = "dataset10000.db"
    h5_db = "h510000.db"

    dc = DatasetConverter(dataset, sqlite_db)
    dc.convert()

    h5conv = DatasetDB2HDF5(sqlite_db, h5_db)
    h5conv.convert()

    db = DatasetDatabase(sqlite_db)
    db.connect()

    first_datetime = dt.datetime.strptime(db.get_first_datetime(None),
                                          DATE_FORMAT)
    last_datetime = dt.datetime.strptime(db.get_last_datetime(None),
                                         DATE_FORMAT)
    delta = last_datetime - first_datetime
    pnum = delta.days * 3600 * 24 + delta.seconds + 1

    db.disconnect()

    with h5py.File(h5_db, 'r') as f:
        for name in f.keys():
            assert f[name].len() == pnum
def calc(args):
    db = DatasetDatabase(args.database_file)
    db.connect()
    first_datetime = dt.datetime.strptime(db.get_first_datetime(None), DATE_FORMAT)
    last_datetime = dt.datetime.strptime(db.get_last_datetime(None), DATE_FORMAT)
    ts_names = db.get_distinct_names()
    delta = last_datetime - first_datetime
    pnum = delta.days * 3600 * 24 + delta.seconds + 1
    total_points = pnum * len(ts_names)
    print(first_datetime.strftime("%m/%d/%Y-%H:%M:%S") + " - " + last_datetime.strftime("%m/%d/%Y-%H:%M:%S"))
    print("delta: " + str(delta))
    print("points per time series: %d" % pnum)
    print("total points in interpolated dataset: " + str(total_points))
    print("Estimated size (4 bytes per point): %f MB" % (total_points * 4.0 / 1024.0 / 1024.0))

    db.disconnect()
def calc(args):
    db = DatasetDatabase(args.database_file)
    db.connect()
    first_datetime = dt.datetime.strptime(db.get_first_datetime(None),
                                          DATE_FORMAT)
    last_datetime = dt.datetime.strptime(db.get_last_datetime(None),
                                         DATE_FORMAT)
    ts_names = db.get_distinct_names()
    delta = last_datetime - first_datetime
    pnum = delta.days * 3600 * 24 + delta.seconds + 1
    total_points = pnum * len(ts_names)
    print(
        first_datetime.strftime("%m/%d/%Y-%H:%M:%S") + " - " +
        last_datetime.strftime("%m/%d/%Y-%H:%M:%S"))
    print("delta: " + str(delta))
    print("points per time series: %d" % pnum)
    print("total points in interpolated dataset: " + str(total_points))
    print("Estimated size (4 bytes per point): %f MB" %
          (total_points * 4.0 / 1024.0 / 1024.0))

    db.disconnect()
def test_converter(testfiles):
    dc = DatasetConverter(testfiles["data100"], "./test_database.db")
    dc.convert()

    db = DatasetDatabase("./test_database.db")
    db.connect()

    ts = db.get_time_series("Forex·EURSEK·NoExpiry")

    assert ts.fetchall() == [("07/08/2015", "00:05:12", "9.37086666666667",
                              "1.0"),
                             ("07/08/2015", "00:05:13", "9.3714", "1.0"),
                             ("07/08/2015", "00:05:14", "9.3713", "1.0")]
    db.disconnect()
    os.remove("./test_database.db")
def test_converter(testfiles):
    dc = DatasetConverter(testfiles["data100"], "./test_database.db")
    dc.convert()

    db = DatasetDatabase("./test_database.db")
    db.connect()

    ts = db.get_time_series("Forex·EURSEK·NoExpiry")

    assert ts.fetchall() == [("07/08/2015", "00:05:12", "9.37086666666667", "1.0"),
                             ("07/08/2015", "00:05:13", "9.3714", "1.0"),
                             ("07/08/2015", "00:05:14", "9.3713", "1.0")
                             ]
    db.disconnect()
    os.remove("./test_database.db")
def test(testfiles):
    dataset = testfiles["data10000"]
    sqlite_db = "dataset10000.db"
    h5_db = "h510000.db"

    dc = DatasetConverter(dataset, sqlite_db)
    dc.convert()

    h5conv = DatasetDB2HDF5(sqlite_db, h5_db)
    h5conv.convert()

    db = DatasetDatabase(sqlite_db)
    db.connect()

    first_datetime = dt.datetime.strptime(db.get_first_datetime(None), DATE_FORMAT)
    last_datetime = dt.datetime.strptime(db.get_last_datetime(None), DATE_FORMAT)
    delta = last_datetime - first_datetime
    pnum = delta.days * 3600 * 24 + delta.seconds + 1

    db.disconnect()

    with h5py.File(h5_db, 'r') as f:
        for name in f.keys():
            assert f[name].len() == pnum
def test_DatasetDatabase():
    #
    # Part1 (insert 1 row)
    #
    test_db_filename = "test_db"
    db = DatasetDatabase(test_db_filename)
    db.connect()

    db.store_data("time-series1", 0, "11-11-2015", "19:12:00", 123.4, 1)

    assert isinstance(db.conn, sql.Connection)
    c = db.conn.cursor()
    assert isinstance(c, sql.Cursor)
    c.execute("SELECT * from dataset")
    assert c.fetchone() == ("time-series1", 0, "11-11-2015", "19:12:00",
                            "123.4", "1")

    iterator = db.get_time_series("time-series1")
    assert iterator is not None
    for row in iterator:
        assert row == ("11-11-2015", "19:12:00", "123.4", "1")

    db.disconnect()
    assert db.conn is None

    os.remove(test_db_filename)

    #
    # Part 2 (insert multiple rows)
    #
    db = DatasetDatabase(test_db_filename)
    db.connect()

    data = [("time-series1", 1, "11-11-2015", "19:12:01", "123.5", "1"),
            ("time-series1", 2, "11-11-2015", "19:12:02", "123.6", "1"),
            ("time-series1", 3, "11-11-2015", "19:12:03", "123.7", "1"),
            ("time-series1", 4, "11-11-2015", "19:12:04", "123.8", "1"),
            ("time-series1", 5, "11-11-2015", "19:12:05", "123.9", "1"),
            ("time-series1", 6, "11-11-2015", "19:12:06", "123.5", "1")]

    db.store_multiple_data(data)

    iterator = db.get_time_series("time-series1")

    assert iterator.fetchall() == [("11-11-2015", "19:12:01", "123.5", "1"),
                                   ("11-11-2015", "19:12:02", "123.6", "1"),
                                   ("11-11-2015", "19:12:03", "123.7", "1"),
                                   ("11-11-2015", "19:12:04", "123.8", "1"),
                                   ("11-11-2015", "19:12:05", "123.9", "1"),
                                   ("11-11-2015", "19:12:06", "123.5", "1")]

    db.disconnect()
    assert db.conn is None

    os.remove(test_db_filename)
def test_DatasetDatabase():
    #
    # Part1 (insert 1 row)
    #
    test_db_filename = "test_db"
    db = DatasetDatabase(test_db_filename)
    db.connect()

    db.store_data("time-series1", 0, "11-11-2015", "19:12:00", 123.4, 1)

    assert isinstance(db.conn, sql.Connection)
    c = db.conn.cursor()
    assert isinstance(c, sql.Cursor)
    c.execute("SELECT * from dataset")
    assert c.fetchone() == ("time-series1", 0, "11-11-2015", "19:12:00", "123.4", "1")

    iterator = db.get_time_series("time-series1")
    assert iterator is not None
    for row in iterator:
        assert row == ("11-11-2015", "19:12:00", "123.4", "1")

    db.disconnect()
    assert db.conn is None

    os.remove(test_db_filename)

    #
    # Part 2 (insert multiple rows)
    #
    db = DatasetDatabase(test_db_filename)
    db.connect()

    data = [("time-series1", 1, "11-11-2015", "19:12:01", "123.5", "1"),
            ("time-series1", 2, "11-11-2015", "19:12:02", "123.6", "1"),
            ("time-series1", 3, "11-11-2015", "19:12:03", "123.7", "1"),
            ("time-series1", 4, "11-11-2015", "19:12:04", "123.8", "1"),
            ("time-series1", 5, "11-11-2015", "19:12:05", "123.9", "1"),
            ("time-series1", 6, "11-11-2015", "19:12:06", "123.5", "1")]

    db.store_multiple_data(data)

    iterator = db.get_time_series("time-series1")

    assert iterator.fetchall() == [("11-11-2015", "19:12:01", "123.5", "1"),
                                   ("11-11-2015", "19:12:02", "123.6", "1"),
                                   ("11-11-2015", "19:12:03", "123.7", "1"),
                                   ("11-11-2015", "19:12:04", "123.8", "1"),
                                   ("11-11-2015", "19:12:05", "123.9", "1"),
                                   ("11-11-2015", "19:12:06", "123.5", "1")]

    db.disconnect()
    assert db.conn is None

    os.remove(test_db_filename)