def init():
    global spark
    spark = SparkSession.builder.getOrCreate()
    global sc
    sc = spark.sparkContext
    global sqlContext
    sqlContext = spark._wrapped
    global p_event_store
    p_event_store = PEventStore(spark._jsparkSession, sqlContext)

    cleanup_functions = CleanupFunctions(sqlContext)
    atexit.register(lambda: cleanup_functions.run())
    atexit.register(lambda: sc.stop())
    print("Initialized pypio")
Beispiel #2
0
try:
    SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf()
    spark = SparkSession.builder.enableHiveSupport().getOrCreate()
except py4j.protocol.Py4JError:
    spark = SparkSession.builder.getOrCreate()
except TypeError:
    spark = SparkSession.builder.getOrCreate()

sc = spark.sparkContext
sql = spark.sql
atexit.register(lambda: sc.stop())

sqlContext = spark._wrapped
sqlCtx = sqlContext

p_event_store = PEventStore(spark._jsparkSession, sqlContext)


def run_pio_workflow(model):
    template_engine = sc._jvm.org.jpioug.template.python.Engine
    template_engine.modelRef().set(model._to_java())
    main_args = new_string_array(sys.argv, sc._gateway)
    create_workflow = sc._jvm.org.apache.predictionio.workflow.CreateWorkflow
    sc.stop()
    create_workflow.main(main_args)

### END: SETUP ###

# In[ ]:

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

from pypio.data import PEventStore

p_event_store = PEventStore(spark._jsparkSession, sqlContext)

    template_engine = sc._jvm.org.example.vanilla.VanillaEngine
    template_engine.modelRef().set(model)
    template_engine.userdictRef().set(userdict)
    template_engine.itemdictRef().set(itemdict)
    main_args =  utils.toJArray(sc._gateway, sc._gateway.jvm.String, sys.argv)
    create_workflow = sc._jvm.org.apache.predictionio.workflow.CreateWorkflow
    sc.stop()
    create_workflow.main(main_args)

sqlContext = spark._wrapped
sqlCtx = sqlContext

app_name = 'NCF'
event_names = utils.toJArray(sc._gateway, sc._gateway.jvm.String, ['purchased-event'])

p_event_store = PEventStore(spark._jsparkSession, sqlContext)
event_df = p_event_store.find(app_name, entity_type='user', target_entity_type='item', event_names=event_names)
ratings = event_df.toPandas().rename(index=str, columns={'entityId': 'userid', 'targetEntityId': 'itemid', 'eventTime': 'timestamp'})

#For running with eval only, drop dupe user-item interactions and users with < 2 interactions

ratings = ratings.drop_duplicates(subset=["userid", "itemid"], keep="last")
ratings = ratings[ratings.duplicated(subset=['userid'], keep=False)]

ratings['rating'] = 1

ratings['userid'] = pd.to_numeric(ratings['userid'].str[5:]).astype(int)
ratings['itemid'] = pd.to_numeric(ratings['itemid'].str[6:]).astype(int)
ratings['timestamp'] = pd.to_numeric(ratings['timestamp'])

#TODO: Hashing trick here instead of dicts
sc = spark.sparkContext
sql = spark.sql


def pio_cleanup():
    sc.stop()
    sc._jvm.org.apache.predictionio.workflow.CleanupFunctions.run()


atexit.register(pio_cleanup)

sqlContext = spark._wrapped
sqlCtx = sqlContext

p_event_store = PEventStore(spark._jsparkSession, sqlContext)


def run_pio_workflow(model):
    template_engine = sc._jvm.org.jpioug.template.python.Engine
    template_engine.modelRef().set(model._to_java())
    main_args = new_string_array(sys.argv, sc._gateway)
    create_workflow = sc._jvm.org.apache.predictionio.workflow.CreateWorkflow
    sc.stop()
    create_workflow.main(main_args)


### END: SETUP ###

# In[ ]: