def _setup():
    global backup_conf_defaults
    backup_conf_defaults = {
        'samplemethod' : conf.default_samplemethod(),
        'maxrows': conf.default_maxrows(),
        'samplefraction': conf.default_samplefraction()
    }
Exemple #2
0
    def __init__(self,
                 output_var,
                 samplemethod=None,
                 maxrows=None,
                 samplefraction=None,
                 spark_events=None,
                 coerce=None):
        super(SparkStoreCommand, self).__init__("", spark_events)

        if samplemethod is None:
            samplemethod = conf.default_samplemethod()
        if maxrows is None:
            maxrows = conf.default_maxrows()
        if samplefraction is None:
            samplefraction = conf.default_samplefraction()

        if samplemethod not in {u'take', u'sample'}:
            raise BadUserDataException(
                u'samplemethod (-m) must be one of (take, sample)')
        if not isinstance(maxrows, int):
            raise BadUserDataException(u'maxrows (-n) must be an integer')
        if not 0.0 <= samplefraction <= 1.0:
            raise BadUserDataException(
                u'samplefraction (-r) must be a float between 0.0 and 1.0')

        self.samplemethod = samplemethod
        self.maxrows = maxrows
        self.samplefraction = samplefraction
        self.output_var = output_var
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events
        self._coerce = coerce
Exemple #3
0
    def __init__(self,
                 query,
                 samplemethod=None,
                 maxrows=None,
                 samplefraction=None,
                 spark_events=None):
        super(SQLQuery, self).__init__()

        if samplemethod is None:
            samplemethod = conf.default_samplemethod()
        if maxrows is None:
            maxrows = conf.default_maxrows()
        if samplefraction is None:
            samplefraction = conf.default_samplefraction()

        if samplemethod not in {u'take', u'sample'}:
            raise BadUserDataException(
                u'samplemethod (-m) must be one of (take, sample)')
        if not isinstance(maxrows, int):
            raise BadUserDataException(u'maxrows (-n) must be an integer')
        if not 0.0 <= samplefraction <= 1.0:
            raise BadUserDataException(
                u'samplefraction (-r) must be a float between 0.0 and 1.0')

        self.query = query
        self.samplemethod = samplemethod
        self.maxrows = maxrows
        self.samplefraction = samplefraction
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events
    def do_send_to_spark(self, cell, input_variable_name, var_type, output_variable_name, max_rows, session_name):
        try:
            input_variable_value = self.shell.user_ns[input_variable_name]
        except KeyError:
            raise BadUserDataException(u'Variable named {} not found.'.format(input_variable_name))
        if input_variable_value is None:
            raise BadUserDataException(u'Value of {} is None!'.format(input_variable_name))

        if not output_variable_name:
            output_variable_name = input_variable_name

        if not max_rows:
            max_rows = conf.default_maxrows()

        input_variable_type = var_type.lower()
        if input_variable_type == self._STRING_VAR_TYPE:
            command = SendStringToSparkCommand(input_variable_name, input_variable_value, output_variable_name)
        elif input_variable_type == self._PANDAS_DATAFRAME_VAR_TYPE:
            command = SendPandasDfToSparkCommand(input_variable_name, input_variable_value, output_variable_name, max_rows)
        else:
            raise BadUserDataException(u'Invalid or incorrect -t type. Available are: [{}]'.format(u','.join(self._ALLOWED_LOCAL_TO_SPARK_TYPES)))

        (success, result, mime_type) = self.spark_controller.run_command(command, None)
        if not success:
            self.ipython_display.send_error(result)
        else:
            self.ipython_display.write(u'Successfully passed \'{}\' as \'{}\' to Spark'
                                       u' kernel'.format(input_variable_name, output_variable_name))
Exemple #5
0
    def __init__(self, query, samplemethod=None, maxrows=None, samplefraction=None, spark_events=None):
        super(SQLQuery, self).__init__()
        
        if samplemethod is None:
            samplemethod = conf.default_samplemethod()
        if maxrows is None:
            maxrows = conf.default_maxrows()
        if samplefraction is None:
            samplefraction = conf.default_samplefraction()

        if samplemethod not in {u'take', u'sample'}:
            raise BadUserDataException(u'samplemethod (-m) must be one of (take, sample)')
        if not isinstance(maxrows, int):
            raise BadUserDataException(u'maxrows (-n) must be an integer')
        if not 0.0 <= samplefraction <= 1.0:
            raise BadUserDataException(u'samplefraction (-r) must be a float between 0.0 and 1.0')

        self.query = query
        self.samplemethod = samplemethod
        self.maxrows = maxrows
        self.samplefraction = samplefraction
        if spark_events is None:
            spark_events = SparkEvents()
        self._spark_events = spark_events