def azureml_main(frame1):
# Set backend    
    import matplotlib
    matplotlib.use('agg')   
    
    import matplotlib.pyplot as plt 
    import milkutilities as mu
    from sklearn import linear_model
    
    Azure = False
    
    X = frame1[['Month.Count', 'monthNumCubed']].as_matrix()
    Y = frame1['Milk.Prod'].as_matrix()
    regr = linear_model.LinearRegression()
    regr.fit(X, Y)

    frame1['Predicted'] = regr.predict(X).tolist()
    frame1['Resid'] = frame1['Milk.Prod'] - frame1['Predicted']

## Add a time index to the data frame
    frame1 = mu.add_time_index(frame1)
    
    fig1 = plt.figure(1, figsize = (12,9))
    ax = fig1.gca()
    frame1[['Milk.Prod', 'Predicted']].plot(ax = ax)
    plt.xlabel("Date")
    plt.ylabel("Log CA milk production")
    plt.title("Log of milk produciton vs. date")
    plt.show()
    if(Azure == True): fig1.savefig('scatter1.png')
      
    fig2 = plt.figure(1, figsize = (12,9))
    fig2.clf()
    ax = fig2.gca()
    frame1['Resid'].plot(ax = ax)
    plt.xlabel("Date")
    plt.ylabel("Residuals of linear model")
    plt.title("Residuals of linear model vs. date")
    plt.show()
    if(Azure == True): fig2.savefig('scatter2.png')
    
    
    fig3 = plt.figure(1, figsize = (12,9))
    fig3.clf()
    ax = fig3.gca()
    frame1.boxplot( column = ['Resid'], ax = ax,
                   by = ['Month.Number','Month'])
    plt.xlabel("Month of year")
    plt.ylabel("Residuals of linear model")
    plt.title("Residuals of linear model by Month")
    plt.show()  
    if(Azure == True): fig3.savefig('scatter3.png')
  
    
    return frame1
def azureml_main(frame1):
    # Set backend
    import matplotlib
    matplotlib.use('agg')

    import matplotlib.pyplot as plt
    import milkutilities as mu
    from sklearn import linear_model

    Azure = False

    X = frame1[['Month.Count', 'monthNumCubed']].as_matrix()
    Y = frame1['Milk.Prod'].as_matrix()
    regr = linear_model.LinearRegression()
    regr.fit(X, Y)

    frame1['Predicted'] = regr.predict(X).tolist()
    frame1['Resid'] = frame1['Milk.Prod'] - frame1['Predicted']

    ## Add a time index to the data frame
    frame1 = mu.add_time_index(frame1)

    fig1 = plt.figure(1, figsize=(12, 9))
    ax = fig1.gca()
    frame1[['Milk.Prod', 'Predicted']].plot(ax=ax)
    plt.xlabel("Date")
    plt.ylabel("Log CA milk production")
    plt.title("Log of milk produciton vs. date")
    plt.show()
    if (Azure == True): fig1.savefig('scatter1.png')

    fig2 = plt.figure(1, figsize=(12, 9))
    fig2.clf()
    ax = fig2.gca()
    frame1['Resid'].plot(ax=ax)
    plt.xlabel("Date")
    plt.ylabel("Residuals of linear model")
    plt.title("Residuals of linear model vs. date")
    plt.show()
    if (Azure == True): fig2.savefig('scatter2.png')

    fig3 = plt.figure(1, figsize=(12, 9))
    fig3.clf()
    ax = fig3.gca()
    frame1.boxplot(column=['Resid'], ax=ax, by=['Month.Number', 'Month'])
    plt.xlabel("Month of year")
    plt.ylabel("Residuals of linear model")
    plt.title("Residuals of linear model by Month")
    plt.show()
    if (Azure == True): fig3.savefig('scatter3.png')

    return frame1
def azureml_main(frame1):
    import pandas as pd
    import numpy as np
    import milkutilities as mu
    import os.path
 
## If not in the Azure environment, read the data from a csv 
## file for testing purposes. Use os.path.join to create an
## OS independent way to create the full path, which should
## work on Windows, Mac, and Linux.   
    Azure = False  
    if(Azure == False):
        pathName = "C:/Users/Steve/Documents/AzureML/Data Sets/CA_Milk"
        fileName = "cadairydata.csv"
        filePath = os.path.join(pathName, fileName)
        frame1 = pd.read_csv(filePath)

## Trim the month codes to 3 characters to ensure they
## are sonsistent. 
    frame1['Month'] = frame1['Month'].map(lambda x: str(x)[:3])

## Add a time index to the data frame
    frame1 = mu.add_time_index(frame1)
    
## Add a date-time type column.
#    frame1['datetime'] = frame1['Year'].apply(str)  + '-' + frame1['Month.Number'].apply(str)+ '-01'
#    frame1['datetime'] = pd.to_datetime(frame1['datetime'], format="%Y-%m-%d")
    
## Compute new columns containing the polynomial values
## of the count of months. 
    frame1['Month.Count']  =  frame1['Month.Number'] + \
                    12 * (frame1['Year'] - 1995)    
    x = frame1['Month.Count'].as_matrix()                
    frame1['monthNumSqred'] = np.power(x, 2).tolist()
    frame1['monthNumCubed'] = np.power(x, 3).tolist()    

    return frame1
def azureml_main(frame1):
    import pandas as pd
    import milkutilities as mu
    import os.path
 
## If not in the Azure environment, read the data from a csv 
## file for testing purposes. Use os.path.join to create an
## OS independent way to create the full path, which should
## work on Windows, Mac, and Linux.   
    Azure = False  
    if(Azure == False):
        pathName = "C:/Users/Steve/Documents/AzureML/Data Sets/CA_Milk"
        fileName = "cadairydata.csv"
        filePath = os.path.join(pathName, fileName)
        frame1 = pd.read_csv(filePath)

## Add a time index to the data frame
    frame1 = mu.add_time_index(frame1)
    
## Cut the last 12 months off the end of the 
## data frame using pandas time series indexing. 
    frame2  =  frame1[:'2013-01-01']     

    return frame2
def azureml_main(frame1):
    # Set backend
    import matplotlib
    matplotlib.use('agg')

    import matplotlib.pyplot as plt
    import milkutilities as mu
    import statsmodels.api as sm
    import pandas as pd

    Azure = False

    ## Add a time index to the data frame
    frame1 = mu.add_time_index(frame1)

    ## Compute the residuals
    frame1['Resid'] = frame1['Milk.Prod'] - frame1['Scored Labels']

    fig1 = plt.figure(1, figsize=(12, 9))
    ax = fig1.gca()
    frame1[['Milk.Prod', 'Scored Labels']].plot(ax=ax)
    plt.xlabel("Date")
    plt.ylabel("Log CA milk production")
    plt.title("Log of milk produciton vs. date")
    plt.show()
    if (Azure == True): fig1.savefig('scatter1.png')

    fig2 = plt.figure(1, figsize=(12, 9))
    fig2.clf()
    ax = fig2.gca()
    frame1['Resid'].plot(ax=ax)
    plt.xlabel("Date")
    plt.ylabel("Residuals of linear model")
    plt.title("Residuals of linear model vs. date")
    plt.show()
    if (Azure == True): fig2.savefig('scatter2.png')

    fig3 = plt.figure(1, figsize=(12, 9))
    fig3.clf()
    ax = fig3.gca()
    frame1.boxplot(column=['Resid'], ax=ax, by=['Month.Number', 'Month'])
    plt.xlabel("Month of year")
    plt.ylabel("Residuals of linear model")
    plt.title("Residuals of linear model by Month")
    plt.show()
    if (Azure == True): fig3.savefig('scatter3.png')

    ## QQ Normal plot of residuals
    fig4 = plt.figure(figsize=(12, 6))
    fig4.clf()
    ax = fig4.gca()
    sm.qqplot(frame1['Resid'], ax=ax)
    ax.set_title('QQ Normal plot of residuals')
    if (Azure == True): fig4.savefig('plot4.png')

    ## Histograms of the residuals
    fig5 = plt.figure(figsize=(12, 6))
    fig5.clf()
    fig5.clf()
    ax = fig5.gca()
    ax.hist(frame1['Resid'].as_matrix(), bins=40)
    ax.set_xlabel("Model residuals")
    ax.set_ylabel("Density")
    ax.set_title("Histogram of residuals")
    if (Azure == True): fig5.savefig('plot5.png')

    ## Compute a data frame with the rms errors for the model
    out_frame = pd.DataFrame({ \
      'rmse_Overall' : [rmse(frame1['Resid'])], \
      'rmse_test' : [rmse(frame1.ix['2013-01-31':, 'Resid'])] })

    return out_frame
def azureml_main(frame1):
# Set backend    
    import matplotlib
    matplotlib.use('agg')   
    
    import matplotlib.pyplot as plt 
    import milkutilities as mu
    import statsmodels.api as sm
    import pandas as pd
    
    Azure = False

## Add a time index to the data frame
    frame1 = mu.add_time_index(frame1)

## Compute the residuals    
    frame1['Resid'] = frame1['Milk.Prod'] - frame1['Scored Labels']
    
    fig1 = plt.figure(1, figsize = (12,9))
    ax = fig1.gca()
    frame1[['Milk.Prod', 'Scored Labels']].plot(ax = ax)
    plt.xlabel("Date")
    plt.ylabel("Log CA milk production")
    plt.title("Log of milk produciton vs. date")
    plt.show()
    if(Azure == True): fig1.savefig('scatter1.png')
      
    fig2 = plt.figure(1, figsize = (12,9))
    fig2.clf()
    ax = fig2.gca()
    frame1['Resid'].plot(ax = ax)
    plt.xlabel("Date")
    plt.ylabel("Residuals of linear model")
    plt.title("Residuals of linear model vs. date")
    plt.show()
    if(Azure == True): fig2.savefig('scatter2.png')
    
    
    fig3 = plt.figure(1, figsize = (12,9))
    fig3.clf()
    ax = fig3.gca()
    frame1.boxplot( column = ['Resid'], ax = ax,
                   by = ['Month.Number','Month'])
    plt.xlabel("Month of year")
    plt.ylabel("Residuals of linear model")
    plt.title("Residuals of linear model by Month")
    plt.show()  
    if(Azure == True): fig3.savefig('scatter3.png')
  
  ## QQ Normal plot of residuals    
    fig4 = plt.figure(figsize = (12,6))
    fig4.clf()
    ax = fig4.gca()
    sm.qqplot(frame1['Resid'], ax = ax)
    ax.set_title('QQ Normal plot of residuals')
    if(Azure == True): fig4.savefig('plot4.png')

## Histograms of the residuals
    fig5 = plt.figure(figsize = (12,6))
    fig5.clf()
    fig5.clf()
    ax = fig5.gca()
    ax.hist(frame1['Resid'].as_matrix(), bins = 40)
    ax.set_xlabel("Model residuals")
    ax.set_ylabel("Density")
    ax.set_title("Histogram of residuals")
    if(Azure == True): fig5.savefig('plot5.png')

## Compute a data frame with the rms errors for the model    
    out_frame = pd.DataFrame({ \
      'rmse_Overall' : [rmse(frame1['Resid'])], \
      'rmse_test' : [rmse(frame1.ix['2013-01-31':, 'Resid'])] }) 
    
    return out_frame