Beispiel #1
0
 def gremlin(self, line, cell):
     results = self.getGraphClient().run_gremlin_query(cell)
     varName = self.getLineOption(line, "var")
     pdFrame = self.toPandas(results)
     if varName is not None:
         ShellAccess[varName]=results
     ShellAccess["pdFrame"]=pdFrame
     display(pdFrame)
    def run(self):

        # forever
        while (True):
            # read line from Node's stdout
            line = self.ps.stdout.readline()

            # see if it parses as JSON
            obj = None
            try:
                if line:
                    obj = json.loads(line)
            except Exception as e:
                # output the original line when we don't have JSON
                line = line.strip()
                if len(line) > 0:
                    print(line)

            try:

                # if it does and is a pixiedust object
                if obj and obj['_pixiedust']:
                    if obj['type'] == 'display':
                        pdf = pandas.DataFrame(obj['data'])
                        ShellAccess.pdf = pdf
                        display(pdf)
                    elif obj['type'] == 'print':
                        print(json.dumps(obj['data']))
                    elif obj['type'] == 'store':
                        variable = 'pdf'
                        if 'variable' in obj:
                            variable = obj['variable']
                        ShellAccess[variable] = pandas.DataFrame(obj['data'])
                    elif obj['type'] == 'html':
                        IPython.display.display(
                            IPython.display.HTML(obj['data']))
                    elif obj['type'] == 'image':
                        IPython.display.display(
                            IPython.display.HTML('<img src="{0}" />'.format(
                                obj['data'])))

            except Exception as e:
                print(line)
                print(e)
    def run(self):
        
        # forever
        while(True):
            # read line from Node's stdout
            line = self.ps.stdout.readline()

            # see if it parses as JSON
            obj = None
            try:
                if line:
                  obj = json.loads(line)
            except Exception as e:
                # output the original line when we don't have JSON
                line = line.strip()
                if len(line) > 0:
                    print(line)

            try:

                # if it does and is a pixiedust object
                if obj and obj['_pixiedust']: 
                    if obj['type'] == 'display':
                        pdf = pandas.DataFrame(obj['data'])
                        ShellAccess.pdf = pdf
                        display(pdf)
                    elif obj['type'] == 'print':
                        print(json.dumps(obj['data']))
                    elif obj['type'] == 'store':
                        variable = 'pdf'
                        if 'variable' in obj:
                            variable = obj['variable']
                        ShellAccess[variable] = pandas.DataFrame(obj['data'])
  
            except Exception as e:
                print(line)
                print(e)
Beispiel #4
0
    def sql(self, line, cell=None):
            
        # Before we event get started, check to see if you have connected yet. Without a connection we 
        # can't do anything. You may have a connection request in the code, so if that is true, we run those,
        # otherwise we connect immediately
        
        # If your statement is not a connect, and you haven't connected, we need to do it for you
    
        global settings 
        global hdbc, hstmt, connected
        
        select = ["SELECT", "WITH", "VALUES"] 
        noBlock = 0
        sqlBlock = 1
        db2Block = 2
            
        # If you use %sql (line) we just run the SQL. If you use %%SQL the entire cell is run.
        
        flag_delim = ";"
        flag_results = True
        flag_sqlType = noBlock
        flag_quiet = False
        flag_json = False
        flag_timer = False
        flag_plot = 0
        flag_cell = False
        flag_output = False
        flag_resultset = False
        flag_dataframe = False
        
        # The parameters must be in the line, not in the cell i.e. %sql -c 
        
        Parms = line.strip()
        
        if len(Parms) == 0:
            if cell == None: 
                sqlhelp()
                return
            if len(cell.strip()) == 0: 
                sqlhelp()
                return
            
        # Check of you just want help
        
        if Parms == "?":
            sqlhelp()
            return
        
        if Parms.upper() == "? CONNECT":
            connected_help()
            return
        
        # If you issue a CONNECT statement in %sql then we run this first before auto-connecting
        if findKeyword(Parms,"CONNECT") == True: 
            parseConnect(Parms)
            return
        
        # We need to check to see if we are connected before running any SQL
        if connected == False:
            db2_doConnect()
            if connected == False: return
            
        # Default result set size
        if settings["maxrows"] == -1:
            pandas.reset_option('max_rows')
        else:
            pandas.options.display.max_rows = settings["maxrows"]
      
        # Display rows as JSON structure
        if Parms.find("-j") >= 0:
            flag_json = True
            Parms = Parms.replace("-j"," ")
          
        # Load sample tables for scripts
        if Parms.find('-sampledata') >= 0:
            db2_create_sample()
            return
        
        # Execute the SQL so that it behaves like a SELECT statement
        if Parms.find("-s") >= 0:
            flag_sqlType = sqlBlock
            Parms = Parms.replace("-s"," ")
            
        # Execute the SQL but return the results in an array (basically a two-dimensional array)
        if Parms.find("-r") >= 0:
            flag_resultset = True
            Parms = Parms.replace("-r", " ")
                     
        # Execute the SQL so that it behaves like an INSERT, DELETE, UPDATE or no result set
        if Parms.find("-n") >= 0:
            flag_sqlType = db2Block
            Parms = Parms.replace("-n"," ")
            
        # Quiet execution (no errors or completed messages)
        if Parms.find("-q") >= 0:
            flag_quiet = True
            Parms = Parms.replace("-q"," ")

        # Retrieve all rows (do not use the default limit)
        if Parms.find("-a") >= 0:
            pandas.reset_option('max_rows')
            Parms = Parms.replace("-a"," ")
          
        # Set the delimiter to @ instead of a semi-colon for procedures, triggers, and functions
        if Parms.find("-d") >= 0:
            flag_delim = "@"
            Parms = Parms.replace("-d"," ")        
            
        # Timer function (not that useful, but worth a try)
        if Parms.find("-t") >= 0:
            flag_timer = True
            Parms = Parms.replace("-t"," ")
          
        # Plot functions -pb = bar, -pp = pie, -pl = line
        if Parms.find("-pb") >= 0:
            flag_plot = 1
            Parms = Parms.replace("-pb"," ")
          
        if Parms.find("-pp") >= 0:
            flag_plot = 2
            Parms = Parms.replace("-pp"," ")
                                
        if Parms.find("-pl") >= 0:
            flag_plot = 3
            Parms = Parms.replace("-pl"," ") 
            
        if Parms.find("-i") >= 0:
            flag_plot = 4
            Parms = Parms.replace("-i"," ")             
      
        remainder = Parms.strip()
                    
        # Split the line according to your delimiter
            
        if cell is None:
            sqlLines = [remainder]
            flag_cell = False
        else:
            cell = re.sub('.*?--.*$',"",cell,flags=re.M)
            remainder = cell.replace("\n"," ")
            sqlLines = remainder.split(flag_delim)
            flag_cell = True
                      
        # For each line figure out if you run it as a command (db2) or select (sql)
         
        for sql in sqlLines:

            # Split the line so we know what the first keyword is. We only look at the first one. There may
            # be SQL that returns output that we may not know about
            
            keywords = sql.split()
            if len(keywords) == 0: continue
            
            sqlcmd = keywords[0].upper()
            
            if (flag_timer == True):
                    
                count = sqlTimer(flag_sqlType, sql)
                 
                if flag_quiet == False and count != -1:
                    print("Total iterations in %s second(s): %s" % (runtime,count))
                    
                return(count)
            
            elif (flag_plot != 0):
                
                try:
                    df = pandas.read_sql(sql,hstmt)
                except Exception as err:
                    db2_error(False)
                    return
                
                if flag_plot == 4:
                    
                    ShellAccess.pdf = df
                    display(pdf)

                    return
                
                plt.style.use('ggplot')
                plt.figure()
                col_count = len(df.columns)
                
                if flag_plot == 1:

                    # Bar Chart
                    if (col_count >= 2):
                        xlabel = df.columns.values[0]
                        ylabel = df.columns.values[1]
                        _ = df.plot(kind='bar',x=xlabel,y=ylabel);
                    else:
                        _ = df.plot(kind='bar');
                        
                    
                elif flag_plot == 2:
                    
                    # Pie 
                    if (col_count >= 2):
                        xlabel = df.columns.values[0]
                        xname = df[xlabel].tolist()
                        yname = df.columns.values[1]
                        _ = df.plot(kind='pie',y=yname,labels=xname);
                    else:
                        yname = df.columns.values[0]
                        _ = df.plot(kind='pie',y=yname);
                    
                elif flag_plot == 3:
                    
                    # Line Chart
                    if (col_count >= 2):                    
                        xlabel = df.columns.values[0]
                        ylabel = df.columns.values[1]
                        _ = df.plot(kind='line',x=xlabel,y=ylabel) ;                   
                    else:
                        _ = df.plot(kind='line') ;                      
                        
                else:
                    return
                
                plt.show()
                return
 
            elif (flag_sqlType == sqlBlock) or (sqlcmd in select and flag_sqlType != db2Block):
                
                if flag_json == True:
                    try: 
                        stmt = ibm_db.exec_immediate(hdbc,sql);
                        row_count = 0
                        while( ibm_db.fetch_row(stmt) ):
                            row_count = row_count + 1
                            jsonVal = ibm_db.result(stmt,0)
                            formatted_JSON = json.dumps(json.loads(jsonVal), indent=4, separators=(',', ': '))
                        
                            # Print JSON Structure
                        
                            if row_count > 1: print()
                            print("Row: %d" % row_count)
                            print(formatted_JSON)
                            flag_output = True
                
                    except Exception as err:
                        db2_error(flag_quiet)
                    
                else:  
                    if flag_resultset == True:
                        row_count = 0
                        resultSet = []
                        try:
                            stmt = ibm_db.exec_immediate(hdbc,sql)
                            result = ibm_db.fetch_tuple(stmt)
                            while (result):
                                row = []
                                for col in result:
                                    row.append(col)
                            
                                resultSet.append(row)
                                result = ibm_db.fetch_tuple(stmt)
                            
                            return(resultSet)                                    
                                
                        except Exception as err:
                                db2_error(False) 
                        
                    else:
                        try:
                        
                            dp = pandas.read_sql(sql, hstmt)
                            if flag_dataframe == True:
                                return(dp)
                            else:
                                # pDisplay(dp)
                                flag_output = True
                                return(dp)
                
                        except Exception as err:
                            db2_error(flag_quiet)
                
            else:
                
                try: 
                    ibm_db.exec_immediate(hdbc,sql);
                    if flag_cell == False and flag_quiet == False:
                        print("Command completed.")
                
                except Exception as err:
                    db2_error(flag_quiet)
                    
        if flag_cell == True and flag_output == False:
            print("Command completed.")
Beispiel #5
0
# from pixiedust.packageManager import PackageManager

# pkg = PackageManager()
# pkg.installPackage("graphframes:graphframes:0")
# pkg.printAllPackages()
from pyspark.sql import SQLContext, Row
from pyspark import SparkContext
sc = SparkContext()
sqlContext = SQLContext(sc)

#import the display module
from pixiedust.display import *
#import the Graphs example
from graphframes.examples import Graphs
#create the friends example graph
g = Graphs(sqlContext).friends()
#use the pixiedust display
display(g)