예제 #1
0
def simple_example_2():
    spark = SparkSession.builder.appName(
        'simple-tensorframes-example-2').getOrCreate()
    spark.sparkContext.setLogLevel('WARN')

    rdd = [Row(y=[float(y), float(-y)]) for y in range(10)]
    df = spark.createDataFrame(rdd)

    df.show()
    tfs.print_schema(df)

    # Analyze first to find the dimensions of the vectors.
    df2 = tfs.analyze(df)

    tfs.print_schema(df2)

    # Make a copy of the 'y' column: An inexpensive operation in Spark 2.0+.
    df3 = df2.select(df2.y, df2.y.alias('z'))

    # Execute the tensor graph.
    with tf.Graph().as_default() as graph:
        y_input = tfs.block(df3, 'y', tf_name='y_input')
        z_input = tfs.block(df3, 'z', tf_name='z_input')

        # Perform elementwise sum and minimum.
        y = tf.reduce_sum(y_input, [0], name='y')
        z = tf.reduce_min(z_input, [0], name='z')

        (data_sum, data_min) = tfs.reduce_blocks([y, z], df3)

    print('Elementwise sum: %s and minimum: %s' % (data_sum, data_min))
예제 #2
0
import tensorflow as tf
import tensorframes as tfs
from pyspark.sql import Row

# Build a DataFrame of vectors
data = [Row(y=[float(y), float(-y)]) for y in range(10)]
df = sqlContext.createDataFrame(data)
# Because the dataframe contains vectors, we need to analyze it first to find the
# dimensions of the vectors.
df2 = tfs.analyze(df)

# The information gathered by TF can be printed to check the content:
tfs.print_schema(df2)
# TF has inferred that y contains vectors of size 2
# root
#  |-- y: array (nullable = false) DoubleType[?,2]

# Let's use the analyzed dataframe to compute the sum and the elementwise minimum 
# of all the vectors:
# First, let's make a copy of the 'y' column. This will be very cheap in Spark 2.0+
df3 = df2.select(df2.y, df2.y.alias("z"))
with tf.Graph().as_default() as g:
    # The placeholders. Note the special name that end with '_input':
    y_input = tfs.block(df3, 'y', tf_name="y_input")
    z_input = tfs.block(df3, 'z', tf_name="z_input")
    y = tf.reduce_sum(y_input, [0], name='y')
    z = tf.reduce_min(z_input, [0], name='z')
    # The resulting dataframe
    (data_sum, data_min) = tfs.reduce_blocks([y, z], df3)

# The final results are numpy arrays:
예제 #3
0
 def test_schema(self):
     data = [Row(x=float(x)) for x in range(100)]
     df = self.sql.createDataFrame(data)
     tfs.print_schema(df)
예제 #4
0
 def test_schema(self):
     data = [Row(x=float(x)) for x in range(100)]
     df = self.sql.createDataFrame(data)
     tfs.print_schema(df)
예제 #5
0
import tensorflow as tf
import tensorframes as tfs
from pyspark.sql import Row

# Build a DataFrame of vectors
data = [Row(y=[float(y), float(-y)]) for y in range(10)]
df = sqlContext.createDataFrame(data)
# Because the dataframe contains vectors, we need to analyze it first to find the
# dimensions of the vectors.
df2 = tfs.analyze(df)

# The information gathered by TF can be printed to check the content:
tfs.print_schema(df2)
# TF has inferred that y contains vectors of size 2
# root
#  |-- y: array (nullable = false) DoubleType[?,2]

# Let's use the analyzed dataframe to compute the sum and the elementwise minimum
# of all the vectors:
# First, let's make a copy of the 'y' column. This will be very cheap in Spark 2.0+
df3 = df2.select(df2.y, df2.y.alias("z"))
with tf.Graph().as_default() as g:
    # The placeholders. Note the special name that end with '_input':
    y_input = tfs.block(df3, 'y', tf_name="y_input")
    z_input = tfs.block(df3, 'z', tf_name="z_input")
    y = tf.reduce_sum(y_input, [0], name='y')
    z = tf.reduce_min(z_input, [0], name='z')
    # The resulting dataframe
    (data_sum, data_min) = tfs.reduce_blocks([y, z], df3)

# The final results are numpy arrays: