Python extract_data_types 예제들, minerva.storage.generic.extract_data_types Python 예제들

예제 #1

0

파일 보기

파일: test_store_trend.py 프로젝트: hendrikx-itc/python-minerva

def test_update_and_modify_columns_fractured(conn):
    curr_timezone = timezone("Europe/Amsterdam")
    granularity = create_granularity("900")
    timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0))
    entity_ids = range(1023, 1023 + 100)

    trend_names_a = ["CellID", "CCR", "Drops"]
    data_rows_a = [(i, ("10023", "0.9919", "17")) for i in entity_ids]
    data_types_a = extract_data_types(data_rows_a)

    trend_names_b = ["CellID", "Drops"]
    data_rows_b = [(i, ("10023", "19")) for i in entity_ids]
    data_types_b = extract_data_types(data_rows_b)

    with closing(conn.cursor()) as cursor:
        datasource = name_to_datasource(cursor, "test-src009")
        entitytype = name_to_entitytype(cursor, "test-type001")

        trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor)
        partition = trendstore.partition(timestamp)

        table = partition.table()

        partition.create(cursor)

        partition.check_columns_exist(trend_names_a, data_types_a)(cursor)
        conn.commit()

    store(conn, SCHEMA, table.name, trend_names_a, timestamp, data_rows_a)
    time.sleep(0.2)

    check_columns = map(Column, ["modified", "Drops"])
    query = table.select(check_columns)

    with closing(conn.cursor()) as cursor:
        query.execute(cursor)
        row_before = cursor.fetchone()

    store(conn, SCHEMA, table.name, trend_names_b, timestamp, data_rows_b)

    query = table.select(check_columns)

    with closing(conn.cursor()) as cursor:
        query.execute(cursor)
        row_after = cursor.fetchone()

    assert_not_equal(row_before[0], row_after[0])
    assert_not_equal(row_before[1], row_after[1])

예제 #2

0

파일 보기

파일: datapackage.py 프로젝트: hendrikx-itc/python-minerva

 def deduce_data_types(self):
     """
     Return a list of the minimal required datatypes to store the values in
     this datapackage, in the same order as the values and thus matching the
     order of attribute_names.
     """
     return extract_data_types(self.rows)

예제 #3

0

파일 보기

파일: storage.py 프로젝트: hendrikx-itc/python-minerva

def store(conn, schema, table_name, trend_names, timestamp, data_rows,
        sub_query=None):
    retry = True
    attempt = 0

    store_fn = store_insert

    while retry is True:
        retry = False
        attempt += 1

        if attempt > MAX_RETRIES:
            raise MaxRetriesError("Max retries ({0}) reached".format(MAX_RETRIES))

        with closing(conn.cursor()) as cursor:
            modified = get_timestamp(cursor)

        try:
            # Delete destination data if possible to prevent UPDATEs
            if sub_query:
                if (set(["entity_id", "timestamp", "modified"] + trend_names) ==
                        set(get_column_names(conn, SCHEMA, table_name))):
                    try:
                        delete_by_sub_query(conn, table_name, timestamp, sub_query)
                    except NoSuchTable as exc:
                        data_types = extract_data_types(data_rows)
                        fix = partial(create_trend_table, conn, schema, table_name,
                                trend_names, data_types)
                        raise RecoverableError(str(exc), fix)

            store_fn(conn, schema, table_name, trend_names, timestamp, modified,
                    data_rows)

            with closing(conn.cursor()) as cursor:
                mark_modified(cursor, schema, table_name, timestamp, modified)
        except UniqueViolation:
            try:
                conn.rollback()
            except psycopg2.InterfaceError as exc:
                logging.debug(exc)

            store_fn = store_update

            retry = True
        except RecoverableError as err:
            try:
                conn.rollback()
            except psycopg2.InterfaceError as exc:
                logging.debug(exc)

            logging.debug(str(err))
            err.fix()

            retry = True
        else:
            conn.commit()

예제 #4

0

파일 보기

파일: data.py 프로젝트: hendrikx-itc/python-minerva

def store_datapackage(cursor, trendstore, datapackage, modified):
    data_types = extract_data_types(datapackage.rows)

    partition = trendstore.partition(datapackage.timestamp)
    partition.create(cursor)
    partition.check_columns_exist(datapackage.trend_names, data_types)(cursor)

    store_copy_from(cursor, partition.table(), datapackage, modified)

    return partition

예제 #5

0

파일 보기

파일: storage.py 프로젝트: hendrikx-itc/python-minerva

def store_insert(conn, schema, table_name, trend_names, timestamp, modified,
        data_rows):
    try:
        if len(data_rows) <= LARGE_BATCH_THRESHOLD:
            store_batch_insert(conn, schema, table_name, trend_names, timestamp,
                    modified, data_rows)
        else:
            store_copy_from(conn, schema, table_name, trend_names, timestamp,
                    modified, data_rows)
    except DataTypeMismatch as exc:
        data_types = extract_data_types(data_rows)
        fix = partial(check_column_types, conn, schema, table_name, trend_names,
                data_types)
        raise RecoverableError(str(exc), fix)
    except NoSuchTable as exc:
        data_types = extract_data_types(data_rows)
        fix = partial(create_trend_table, conn, schema, table_name, trend_names,
                data_types)
        raise RecoverableError(str(exc), fix)
    except NoSuchColumnError as exc:
        fix = partial(check_columns_exist, conn, schema, table_name, trend_names)
        raise RecoverableError(str(exc), fix)

예제 #6

0

파일 보기

파일: test_store_trend.py 프로젝트: hendrikx-itc/python-minerva

def test_store_copy_from_1(conn):
    trend_names = ['CellID', 'CCR', 'CCRatts', 'Drops']

    data_rows = [
        (10023, ('10023', '0.9919', '2105', '17')),
        (10047, ('10047', '0.9963', '4906', '18')),
        (10048, ('10048', '0.9935', '2448', '16')),
        (10049, ('10049', '0.9939', '5271', '32')),
        (10050, ('10050', '0.9940', '3693', '22')),
        (10051, ('10051', '0.9944', '3753', '21')),
        (10052, ('10052', '0.9889', '2168', '24')),
        (10053, ('10053', '0.9920', '2372', '19')),
        (10085, ('10085', '0.9987', '2282', '3')),
        (10086, ('10086', '0.9972', '1763', '5')),
        (10087, ('10087', '0.9931', '1453', '10'))
    ]

    curr_timezone = timezone("Europe/Amsterdam")
    data_types = extract_data_types(data_rows)
    timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0))
    granularity = create_granularity("900")
    modified = curr_timezone.localize(datetime.now())

    with closing(conn.cursor()) as cursor:
        datasource = name_to_datasource(cursor, "test-src009")
        entitytype = name_to_entitytype(cursor, "test-type001")

        trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor)
        partition = trendstore.partition(timestamp)

        table = partition.table()

        partition.create(cursor)

        partition.check_columns_exist(trend_names, data_types)(cursor)

        store_copy_from(conn, SCHEMA, table.name, trend_names, timestamp,
                modified, data_rows)

        conn.commit()

        eq_(row_count(cursor, table), 11)

        table.select(Call("max", Column("modified"))).execute(cursor)

        max_modified = first(cursor.fetchone())

        eq_(max_modified, modified)

예제 #7

0

파일 보기

파일: test_store_trend.py 프로젝트: hendrikx-itc/python-minerva

def test_update_modified_column(conn):
    curr_timezone = timezone("Europe/Amsterdam")

    trend_names = ['CellID', 'CCR', 'Drops']
    data_rows = [
        (10023, ('10023', '0.9919', '17')),
        (10047, ('10047', '0.9963', '18'))
    ]
    data_types = extract_data_types(data_rows)

    update_data_rows = [(10023, ('10023', '0.9919', '17'))]
    timestamp = curr_timezone.localize(datetime.now())
    granularity = create_granularity("900")

    with closing(conn.cursor()) as cursor:
        datasource = name_to_datasource(cursor, "test-src009")
        entitytype = name_to_entitytype(cursor, "test-type001")

        trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor)
        partition = trendstore.partition(timestamp)

        table = partition.table()

        partition.create(cursor)

        partition.check_columns_exist(trend_names, data_types)(cursor)

        store(conn, SCHEMA, table.name, trend_names, timestamp, data_rows)
        time.sleep(1)
        store(conn, SCHEMA, table.name, trend_names, timestamp, update_data_rows)
        conn.commit()

        query = table.select([Column("modified")])

        query.execute(cursor)
        modified_list = [modified for modified in cursor.fetchall()]
        assert_not_equal(modified_list[0], modified_list[1])

        table.select(Call("max", Column("modified"))).execute(cursor)

        max_modified = first(cursor.fetchone())

        modified_table.select(Column("end")).where_(
                Eq(Column("table_name"), table.name)).execute(cursor)

        end = first(cursor.fetchone())

        eq_(end, max_modified)

예제 #8

0

파일 보기

파일: test_store_trend.py 프로젝트: hendrikx-itc/python-minerva

def test_store_insert_rows(conn):
    table = Table(SCHEMA, 'storage_tmp_test_table')
    trend_names = ['CellID', 'CCR', 'Drops']
    data_rows = [
        (10023, ('10023', '0.9919', '17')),
        (10047, ('10047', '0.9963', '18'))
    ]
    curr_timezone = timezone("Europe/Amsterdam")
    modified = curr_timezone.localize(datetime.now())
    time1 = curr_timezone.localize(datetime.now())
    time2 = time1 - timedelta(days=1)

    data_types = extract_data_types(data_rows)

    with closing(conn.cursor()) as cursor:
        table.drop().if_exists().execute(cursor)

        create_trend_table(conn, SCHEMA, table.name, trend_names, data_types)

        store_insert_rows(conn, SCHEMA, table.name, trend_names, time1, modified,
                data_rows)
        conn.commit()

        eq_(row_count(cursor, table), 2)

        store_insert_rows(conn, SCHEMA, table.name, trend_names, time2, modified,
                data_rows)
        conn.commit()

        eq_(row_count(cursor, table), 4)

        store_insert_rows(conn, SCHEMA, table.name, trend_names, time1, modified,
                data_rows)
        conn.commit()

        eq_(row_count(cursor, table), 4)

        table.select(Call("max", Column("modified"))).execute(cursor)

        max_modified = first(cursor.fetchone())

        eq_(max_modified, modified)

예제 #9

0

파일 보기

파일: test_store_trend.py 프로젝트: hendrikx-itc/python-minerva

def test_store_using_tmp(conn):
    table = Table(SCHEMA, 'storage_tmp_test_table')
    trend_names = ['CellID', 'CCR', 'RadioFail', 'RFOldHo', 'AbisFailCall']

    data_rows = [
        (10023, ('10023', '0.9919', '10', '3', '3')),
        (10047, ('10047', '0.9963', '11', '5', '0')),
        (10048, ('10048', '0.9935', '12', '3', '0')),
        (10049, ('10049', '0.9939', '20', '3', '4')),
        (10050, ('10050', '0.9940', '18', '3', '0')),
        (10051, ('10051', '0.9944', '17', '2', '2')),
        (10052, ('10052', '0.9889', '18', '2', '0')),
        (10053, ('10053', '0.9920', '15', '3', '1')),
        (10023, ('10023', '0.9931', '9', '0', '1')),
        (10085, ('10085', '0.9987', '3', '0', '0')),
        (10086, ('10086', '0.9972', '3', '2', '0'))
    ]

    data_types = extract_data_types(data_rows)

    with closing(conn.cursor()) as cursor:
        table.drop().if_exists().execute(cursor)

        create_trend_table(conn, SCHEMA, table.name, trend_names, data_types)
        curr_timezone = timezone("Europe/Amsterdam")
        timestamp = curr_timezone.localize(datetime(2013, 1, 2, 10, 45, 0))
        modified = curr_timezone.localize(datetime.now())
        store_using_tmp(conn, SCHEMA, table.name, trend_names, timestamp,
                modified, data_rows)

        conn.commit()

        eq_(row_count(cursor, table), 10)

        table.select(Call("max", Column("modified"))).execute(cursor)

        max_modified = first(cursor.fetchone())

        eq_(max_modified, modified)

예제 #10

0

파일 보기

파일: storage.py 프로젝트: hendrikx-itc/python-minerva

def store_insert_tmp(conn, tmp_table_name, trend_names, timestamp, modified,
        data_rows):
    """
    Same as store_insert, but for temporary tables
    """
    table_name = tmp_table_name.split("tmp_")[-1]

    try:
        if len(data_rows) <= LARGE_BATCH_THRESHOLD:
            store_batch_insert(conn, None, tmp_table_name, trend_names, timestamp,
                    modified, data_rows)
        else:
            store_copy_from(conn, None, tmp_table_name, trend_names, timestamp,
                    modified, data_rows)
    except DataTypeMismatch as exc:
        data_types = extract_data_types(data_rows)
        fix = partial(check_column_types, conn, SCHEMA, table_name, trend_names,
                data_types)
        raise RecoverableError(str(exc), fix)
    except NoSuchColumnError as exc:
        fix = partial(check_columns_exist, conn, SCHEMA, table_name, trend_names)
        raise RecoverableError(str(exc), fix)

예제 #11

0

파일 보기

파일: test_store_trend.py 프로젝트: hendrikx-itc/python-minerva

def test_update(conn):
    trend_names = ["CellID", "CCR", "Drops"]
    data_rows = [
        (10023, ("10023", "0.9919", "17")),
        (10047, ("10047", "0.9963", "18"))
    ]
    data_types = extract_data_types(data_rows)
    update_data_rows = [(10023, ("10023", "0.5555", "17"))]
    timestamp = datetime.now()
    granularity = create_granularity("900")

    with closing(conn.cursor()) as cursor:
        datasource = name_to_datasource(cursor, "test-src009")
        entitytype = name_to_entitytype(cursor, "test-type001")

        trendstore = TrendStore(datasource, entitytype, granularity, 86400, "table").create(cursor)
        partition = trendstore.partition(timestamp)

        table = partition.table()

        partition.create(cursor)

        partition.check_columns_exist(trend_names, data_types)(cursor)

    store(conn, SCHEMA, table.name, trend_names, timestamp, data_rows)

    store(conn, SCHEMA, table.name, trend_names, timestamp, update_data_rows)
    conn.commit()

    query = table.select([Column("modified"), Column("CCR")])

    with closing(conn.cursor()) as cursor:
        query.execute(cursor)
        rows = cursor.fetchall()

    assert_not_equal(rows[0][0], rows[1][0])
    assert_not_equal(rows[0][1], rows[1][1])