def init(): from azureml.dataprep import datasource df = datasource.load_datasource('ratings.dsource') from pyspark.ml.recommendation import ALS als = ALS() \ .setUserCol("userId") \ .setRatingCol("rating") \ .setItemCol("movieId") \ alsModel = als.fit(df) global userRecs userRecs = alsModel.recommendForAllUsers(10) # Query them in SQL import pydocumentdb.documents as documents import pydocumentdb.document_client as document_client import pydocumentdb.errors as errors import datetime MASTER_KEY = 'oX6tWPep8FCah8RM258s7cC3x9Kl8tWdbDxmNknXCP34ShW1Ag1ladvb5QWuBmMxuRISBO2HfrRFv3QeJYCSYg==' HOST = 'https://dcibrecommendationhack.documents.azure.com:443/' DATABASE_ID = "recommendation_engine" COLLECTION_ID = "user_recommendations" database_link = 'dbs/' + DATABASE_ID collection_link = database_link + '/colls/' + COLLECTION_ID global client, collection client = document_client.DocumentClient(HOST, {'masterKey': MASTER_KEY}) collection = client.ReadCollection(collection_link=collection_link)
# initialize logger run_logger = get_azureml_logger() from azureml.dataprep import datasource # start Spark session spark = pyspark.sql.SparkSession.builder.appName( 'classification').getOrCreate() # print runtime versions print('****************') print('Python version: {}'.format(sys.version)) print('Spark version: {}'.format(spark.version)) print('****************') print('***Prepare Input Data to get required attributes***') inputdata = datasource.load_datasource('POLines.dsource') data = inputdata.dropna(subset=['Category']) print('***Filtering Training + Testing + Validation records***') dsinput = data[data['Category'] != ""] rawdata = dsinput[[ 'Category', 'Scenario', 'Company Code', 'Type', 'PGr', 'Created', 'Short Text', 'Storage Location', 'Vendor Material Number', 'Base Unit of Measure', 'Unit of Weight', 'Acct Assignment Cat', 'Material freight grp', 'Plant', 'Profit Center' ]] pdf = rawdata.toPandas() print('Preparing a String Column for Classification') pdf['inputstring'] = pdf[[ 'Scenario', 'Company Code', 'Type', 'PGr', 'Created', 'Short Text',
# Use the Azure Machine Learning data source package from azureml.dataprep import datasource # Use the Azure Machine Learning data collector to log various metrics from azureml.logging import get_azureml_logger logger = get_azureml_logger() # This call will load the referenced data source and return a DataFrame. # If run in a PySpark environment, this call returns a # Spark DataFrame. If not, it will return a Pandas DataFrame. df = datasource.load_datasource('iris.dsource') # Remove this line and add code that uses the DataFrame df.head(10)