def _setup(): global backup_conf_defaults backup_conf_defaults = { 'samplemethod' : conf.default_samplemethod(), 'maxrows': conf.default_maxrows(), 'samplefraction': conf.default_samplefraction() }
def __init__(self, output_var, samplemethod=None, maxrows=None, samplefraction=None, spark_events=None, coerce=None): super(SparkStoreCommand, self).__init__("", spark_events) if samplemethod is None: samplemethod = conf.default_samplemethod() if maxrows is None: maxrows = conf.default_maxrows() if samplefraction is None: samplefraction = conf.default_samplefraction() if samplemethod not in {u'take', u'sample'}: raise BadUserDataException( u'samplemethod (-m) must be one of (take, sample)') if not isinstance(maxrows, int): raise BadUserDataException(u'maxrows (-n) must be an integer') if not 0.0 <= samplefraction <= 1.0: raise BadUserDataException( u'samplefraction (-r) must be a float between 0.0 and 1.0') self.samplemethod = samplemethod self.maxrows = maxrows self.samplefraction = samplefraction self.output_var = output_var if spark_events is None: spark_events = SparkEvents() self._spark_events = spark_events self._coerce = coerce
def __init__(self, query, samplemethod=None, maxrows=None, samplefraction=None, spark_events=None): super(SQLQuery, self).__init__() if samplemethod is None: samplemethod = conf.default_samplemethod() if maxrows is None: maxrows = conf.default_maxrows() if samplefraction is None: samplefraction = conf.default_samplefraction() if samplemethod not in {u'take', u'sample'}: raise BadUserDataException( u'samplemethod (-m) must be one of (take, sample)') if not isinstance(maxrows, int): raise BadUserDataException(u'maxrows (-n) must be an integer') if not 0.0 <= samplefraction <= 1.0: raise BadUserDataException( u'samplefraction (-r) must be a float between 0.0 and 1.0') self.query = query self.samplemethod = samplemethod self.maxrows = maxrows self.samplefraction = samplefraction if spark_events is None: spark_events = SparkEvents() self._spark_events = spark_events
def do_send_to_spark(self, cell, input_variable_name, var_type, output_variable_name, max_rows, session_name): try: input_variable_value = self.shell.user_ns[input_variable_name] except KeyError: raise BadUserDataException(u'Variable named {} not found.'.format(input_variable_name)) if input_variable_value is None: raise BadUserDataException(u'Value of {} is None!'.format(input_variable_name)) if not output_variable_name: output_variable_name = input_variable_name if not max_rows: max_rows = conf.default_maxrows() input_variable_type = var_type.lower() if input_variable_type == self._STRING_VAR_TYPE: command = SendStringToSparkCommand(input_variable_name, input_variable_value, output_variable_name) elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE: command = SendPandasDfToSparkCommand(input_variable_name, input_variable_value, output_variable_name, max_rows) else: raise BadUserDataException(u'Invalid or incorrect -t type. Available are: [{}]'.format(u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES))) (success, result, mime_type) = self.spark_controller.run_command(command, None) if not success: self.ipython_display.send_error(result) else: self.ipython_display.write(u'Successfully passed \'{}\' as \'{}\' to Spark' u' kernel'.format(input_variable_name, output_variable_name))
def __init__(self, query, samplemethod=None, maxrows=None, samplefraction=None, spark_events=None): super(SQLQuery, self).__init__() if samplemethod is None: samplemethod = conf.default_samplemethod() if maxrows is None: maxrows = conf.default_maxrows() if samplefraction is None: samplefraction = conf.default_samplefraction() if samplemethod not in {u'take', u'sample'}: raise BadUserDataException(u'samplemethod (-m) must be one of (take, sample)') if not isinstance(maxrows, int): raise BadUserDataException(u'maxrows (-n) must be an integer') if not 0.0 <= samplefraction <= 1.0: raise BadUserDataException(u'samplefraction (-r) must be a float between 0.0 and 1.0') self.query = query self.samplemethod = samplemethod self.maxrows = maxrows self.samplefraction = samplefraction if spark_events is None: spark_events = SparkEvents() self._spark_events = spark_events