def bar(self, var: str, title: str = '', label: str = '') -> object: """ This method requires a character column (use the contents method to see column types) and generates a bar chart. :param var: the CHAR variable (column) you want to plot :param title: an optional title for the chart :param label: LegendLABEL= value for sgplot :return: graphic plot """ code = "proc sgplot data=" + self.libref + '.' + self.table + self._dsopts( ) code += ";\n\tvbar " + var if len(label) > 0: code += " / LegendLABEL='" + label + "'" code += ";\n" if len(title) > 0: code += '\ttitle "' + title + '";\n' code += 'run;\ntitle;' if self.sas.nosub: print(code) return ll = self._is_valid() if not ll: html = self.HTML self.HTML = 1 ll = self.sas._io.submit(code) self.HTML = html if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll
def columnInfo(self): """ display metadata about the table, size, number of rows, columns and their data type """ code = "proc contents data=" + self.libref + '.' + self.table + ' ' + self._dsopts( ) + ";ods select Variables;run;" if self.sas.nosub: print(code) return if self.results.upper() == 'PANDAS': code = "proc contents data=%s.%s %s ;ods output Variables=work._variables ;run;" % ( self.libref, self.table, self._dsopts()) pd = self._returnPD(code, '_variables') pd['Type'] = pd['Type'].str.rstrip() return pd else: ll = self._is_valid() if self.HTML: if not ll: ll = self.sas._io.submit(code) if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll else: if not ll: ll = self.sas._io.submit(code, "text") if not self.sas.batch: print(ll['LST']) else: return ll
def hist(self, var: str, title: str = '', label: str = '') -> object: """ This method requires a numeric column (use the contents method to see column types) and generates a histogram. :param var: the NUMERIC variable (column) you want to plot :param title: an optional Title for the chart :param label: LegendLABEL= value for sgplot :return: """ code = "proc sgplot data=" + self.libref + '.' + self.table + self._dsopts( ) code += ";\n\thistogram " + var + " / scale=count" if len(label) > 0: code += " LegendLABEL='" + label + "'" code += ";\n" if len(title) > 0: code += '\ttitle "' + title + '";\n' code += "\tdensity " + var + ';\nrun;\n' + 'title;' if self.sas.nosub: print(code) return ll = self._is_valid() if not ll: html = self.HTML self.HTML = 1 ll = self.sas._io.submit(code) self.HTML = html if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll
def top(self, var: str, n: int = 10, order: str = 'freq', title: str = '') -> object: """ Return the most commonly occuring items (levels) :param var: the CHAR variable (column) you want to count :param n: the top N to be displayed (defaults to 10) :param order: default to most common use order='data' to get then in alphbetic order :param title: an optional Title for the chart :return: Data Table """ code = "proc freq data=%s.%s %s order=%s noprint;" % ( self.libref, self.table, self._dsopts(), order) code += "\n\ttables %s / out=tmpFreqOut;" % var code += "\nrun;" if len(title) > 0: code += '\ttitle "' + title + '";\n' code += "proc print data=tmpFreqOut(obs=%s); \nrun;" % n code += 'title;' if self.sas.nosub: print(code) return ll = self._is_valid() if self.results.upper() == 'PANDAS': code = "proc freq data=%s.%s%s order=%s noprint;" % ( self.libref, self.table, self._dsopts(), order) code += "\n\ttables %s / out=tmpFreqOut;" % var code += "\nrun;" code += "\ndata tmpFreqOut; set tmpFreqOut(obs=%s); run;" % n return self._returnPD(code, 'tmpFreqOut') else: if self.HTML: if not ll: ll = self.sas._io.submit(code) if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll else: if not ll: ll = self.sas._io.submit(code, "text") if not self.sas.batch: print(ll['LST']) else: return ll
def heatmap(self, x: str, y: str, options: str = '', title: str = '', label: str = '') -> object: """ Documentation link: http://support.sas.com/documentation/cdl/en/grstatproc/67909/HTML/default/viewer.htm#n0w12m4cn1j5c6n12ak64u1rys4w.htm :param x: x variable :param y: y variable :param options: display options (string) :param title: graph title :param label: :return: """ code = "proc sgplot data=%s.%s %s;" % (self.libref, self.table, self._dsopts()) if len(options): code += "\n\theatmap x=%s y=%s / %s;" % (x, y, options) else: code += "\n\theatmap x=%s y=%s;" % (x, y) if len(label) > 0: code += " LegendLABEL='" + label + "'" code += ";\n" if len(title) > 0: code += "\ttitle '%s';\n" % title code += "run;\ntitle;" if self.sas.nosub: print(code) return ll = self._is_valid() if not ll: html = self.HTML self.HTML = 1 ll = self.sas._io.submit(code) self.HTML = html if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll
def score(self, file: str = '', code: str = '', out: 'SASdata' = None) -> 'SASdata': """ This method is meant to update a SAS Data object with a model score file. :param file: a file reference to the SAS score code :param code: a string of the valid SAS score code :param out: Where to the write the file. Defaults to update in place :return: The Scored SAS Data object. """ if out is not None: outTable = out.table outLibref = out.libref else: outTable = self.table outLibref = self.libref codestr = code code = "data %s.%s%s;" % (outLibref, outTable, self._dsopts()) code += "set %s.%s%s;" % (self.libref, self.table, self._dsopts()) if len(file) > 0: code += '%%include "%s";' % file else: code += "%s;" % codestr code += "run;" if self.sas.nosub: print(code) return None ll = self._is_valid() if not ll: html = self.HTML self.HTML = 1 ll = self.sas._io.submit(code) self.HTML = html if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll
def contents(self): """ display metadata about the table. size, number of rows, columns and their data type ... :return: output """ code = "proc contents data=" + self.libref + '.' + self.table + self._dsopts( ) + ";run;" if self.sas.nosub: print(code) return ll = self._is_valid() if self.results.upper() == 'PANDAS': code = "proc contents data=%s.%s %s ;" % (self.libref, self.table, self._dsopts()) code += "ods output Attributes=work._attributes;" code += "ods output EngineHost=work._EngineHost;" code += "ods output Variables=work._Variables;" code += "ods output Sortedby=work._Sortedby;" code += "run;" return self._returnPD( code, ['_attributes', '_EngineHost', '_Variables', '_Sortedby']) else: if self.HTML: if not ll: ll = self.sas._io.submit(code) if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll else: if not ll: ll = self.sas._io.submit(code, "text") if not self.sas.batch: print(ll['LST']) else: return ll
def scatter(self, x: str, y: list, title: str = '') -> object: """ This method plots a scatter of x,y coordinates. You can provide a list of y columns for multiple line plots. :param x: the x axis variable; generally a time or continuous variable. :param y: the y axis variable(s), you can specify a single column or a list of columns :param title: an optional Title for the chart :return: graph object """ code = "proc sgplot data=" + self.libref + '.' + self.table + self._dsopts( ) + ";\n" if len(title) > 0: code += '\ttitle "' + title + '";\n' if isinstance(y, list): num = len(y) else: num = 1 y = [y] for i in range(num): code += "\tscatter x=" + x + " y=" + y[i] + ";\n" code += 'run;\n' + 'title;' if self.sas.nosub: print(code) return ll = self._is_valid() if not ll: html = self.HTML self.HTML = 1 ll = self.sas._io.submit(code) self.HTML = html if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll
def means(self): """ display descriptive statistics for the table; summary statistics. This is an alias for 'describe' :return: """ dsopts = self._dsopts().partition(';\n\tformat') code = "proc means data=" + self.libref + '.' + self.table + dsopts[ 0] + " stackodsoutput n nmiss median mean std min p25 p50 p75 max;" code += dsopts[1] + dsopts[2] + "run;" if self.sas.nosub: print(code) return ll = self._is_valid() if self.results.upper() == 'PANDAS': code = "proc means data=%s.%s %s stackodsoutput n nmiss median mean std min p25 p50 p75 max; %s ods output Summary=work._summary; run;" % ( self.libref, self.table, dsopts[0], dsopts[1] + dsopts[2]) return self._returnPD(code, '_summary') else: if self.HTML: if not ll: ll = self.sas._io.submit(code) if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll else: if not ll: ll = self.sas._io.submit(code, "text") if not self.sas.batch: print(ll['LST']) else: return ll
def head(self, obs=5): """ display the first n rows of a table :param obs: the number of rows of the table that you want to display. The default is 5 :return: """ topts = dict(self.dsopts) topts['obs'] = obs code = "proc print data=" + self.libref + '.' + self.table + self.sas._dsopts( topts) + ";run;" if self.sas.nosub: print(code) return if self.results.upper() == 'PANDAS': code = "data _head ; set %s.%s %s; run;" % ( self.libref, self.table, self.sas._dsopts(topts)) return self._returnPD(code, '_head') else: ll = self._is_valid() if self.HTML: if not ll: ll = self.sas._io.submit(code) if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll else: if not ll: ll = self.sas._io.submit(code, "text") if not self.sas.batch: print(ll['LST']) else: return ll
def execute_table(self, _output_type, **kwargs: dict) -> 'SASresults': """ executes a PROC TABULATE statement You must specify an output type to use this method, of 'HTML', 'text', or 'Pandas'. There are three convenience functions for generating specific output; see: .text_table() .table() .to_dataframe() :param _output_type: style of output to use :param left: the query for the left side of the table :param top: the query for the top of the table :return: """ left = kwargs.pop('left', None) top = kwargs.pop('top', None) sets = dict(classes=set(), vars=set()) left._gather(sets) if top: top._gather(sets) table = top \ and '%s, %s' % (str(left), str(top)) \ or str(left) proc_kwargs = dict(cls=' '.join(sets['classes']), var=' '.join(sets['vars']), table=table) # permit additional valid options if passed; for now, just 'where' proc_kwargs.update(kwargs) # we can't easily use the SASProcCommons approach for submiting, # since this is merely an output / display proc for us; # but we can at least use it to check valid options in the canonical saspy way required_options = {'cls', 'var', 'table'} allowed_options = {'cls', 'var', 'table', 'where'} verifiedKwargs = SASProcCommons._stmt_check(self, required_options, allowed_options, proc_kwargs) if (_output_type == 'Pandas'): # for pandas, use the out= directive code = "proc tabulate data=%s.%s %s out=temptab;\n" % ( self.data.libref, self.data.table, self.data._dsopts()) else: code = "proc tabulate data=%s.%s %s;\n" % ( self.data.libref, self.data.table, self.data._dsopts()) # build the code for arg, value in verifiedKwargs.items(): code += " %s %s;\n" % (arg == 'cls' and 'class' or arg, value) code += "run;" # teach_me_SAS if self.sas.nosub: print(code) return # submit the code ll = self.data._is_valid() if _output_type == 'HTML': if not ll: html = self.data.HTML self.data.HTML = 1 ll = self.sas._io.submit(code) self.data.HTML = html if not self.sas.batch: DISPLAY(HTML(ll['LST'])) check, errorMsg = self.data._checkLogForError(ll['LOG']) if not check: raise ValueError("Internal code execution failed: " + errorMsg) else: return ll elif _output_type == 'text': if not ll: html = self.data.HTML self.data.HTML = 1 ll = self.sas._io.submit(code, 'text') self.data.HTML = html print(ll['LST']) return elif _output_type == 'Pandas': return self.to_nested_dataframe(code)
def tail(self, obs=5): """ display the last n rows of a table :param obs: the number of rows of the table that you want to display. The default is 5 :return: """ code = "proc sql;select count(*) format best32. into :lastobs from " + self.libref + '.' + self.table + self._dsopts( ) + ";%put lastobs=&lastobs tom;quit;" nosub = self.sas.nosub self.sas.nosub = False le = self._is_valid() if not le: ll = self.sas.submit(code, "text") lastobs = ll['LOG'].rpartition("lastobs=") lastobs = lastobs[2].partition(" tom") lastobs = int(lastobs[0]) else: lastobs = obs firstobs = lastobs - (obs - 1) if firstobs < 1: firstobs = 1 topts = dict(self.dsopts) topts['obs'] = lastobs topts['firstobs'] = firstobs code = "proc print data=" + self.libref + '.' + self.table + self.sas._dsopts( topts) + ";run;" self.sas.nosub = nosub if self.sas.nosub: print(code) return if self.results.upper() == 'PANDAS': code = "data _tail ; set %s.%s %s; run;" % ( self.libref, self.table, self.sas._dsopts(topts)) return self._returnPD(code, '_tail') else: if self.HTML: if not le: ll = self.sas._io.submit(code) else: ll = le if not self.sas.batch: DISPLAY(HTML(ll['LST'])) else: return ll else: if not le: ll = self.sas._io.submit(code, "text") else: ll = le if not self.sas.batch: print(ll['LST']) else: return ll