def init():
    from azureml.dataprep import datasource
    df = datasource.load_datasource('ratings.dsource')

    from pyspark.ml.recommendation import ALS
    als = ALS() \
        .setUserCol("userId") \
        .setRatingCol("rating") \
        .setItemCol("movieId") \

    alsModel = als.fit(df)
    global userRecs
    userRecs = alsModel.recommendForAllUsers(10)

    # Query them in SQL
    import pydocumentdb.documents as documents
    import pydocumentdb.document_client as document_client
    import pydocumentdb.errors as errors
    import datetime

    MASTER_KEY = 'oX6tWPep8FCah8RM258s7cC3x9Kl8tWdbDxmNknXCP34ShW1Ag1ladvb5QWuBmMxuRISBO2HfrRFv3QeJYCSYg=='
    HOST = 'https://dcibrecommendationhack.documents.azure.com:443/'
    DATABASE_ID = "recommendation_engine"
    COLLECTION_ID = "user_recommendations"
    database_link = 'dbs/' + DATABASE_ID
    collection_link = database_link + '/colls/' + COLLECTION_ID

    global client, collection
    client = document_client.DocumentClient(HOST, {'masterKey': MASTER_KEY})
    collection = client.ReadCollection(collection_link=collection_link)
Beispiel #2
0
# initialize logger
run_logger = get_azureml_logger()

from azureml.dataprep import datasource

# start Spark session
spark = pyspark.sql.SparkSession.builder.appName(
    'classification').getOrCreate()
# print runtime versions
print('****************')
print('Python version: {}'.format(sys.version))
print('Spark version: {}'.format(spark.version))
print('****************')
print('***Prepare Input Data to get required attributes***')
inputdata = datasource.load_datasource('POLines.dsource')
data = inputdata.dropna(subset=['Category'])

print('***Filtering Training + Testing + Validation records***')
dsinput = data[data['Category'] != ""]
rawdata = dsinput[[
    'Category', 'Scenario', 'Company Code', 'Type', 'PGr', 'Created',
    'Short Text', 'Storage Location', 'Vendor Material Number',
    'Base Unit of Measure', 'Unit of Weight', 'Acct Assignment Cat',
    'Material freight grp', 'Plant', 'Profit Center'
]]
pdf = rawdata.toPandas()

print('Preparing a String Column for Classification')
pdf['inputstring'] = pdf[[
    'Scenario', 'Company Code', 'Type', 'PGr', 'Created', 'Short Text',
Beispiel #3
0
# Use the Azure Machine Learning data source package
from azureml.dataprep import datasource

# Use the Azure Machine Learning data collector to log various metrics
from azureml.logging import get_azureml_logger
logger = get_azureml_logger()

# This call will load the referenced data source and return a DataFrame.
# If run in a PySpark environment, this call returns a
# Spark DataFrame. If not, it will return a Pandas DataFrame.
df = datasource.load_datasource('iris.dsource')

# Remove this line and add code that uses the DataFrame
df.head(10)