Example #1
0
    def handle_transmitter_merge(self, job_syntax_item, endpoint):
        opname = job_syntax_item[OP_KEY]
        csv_key_val = var_replaced(self.variables, job_syntax_item, CSV_KEY)
        csv_filename = SqlExecutor.csv_name(csv_key_val)
        csv_data = self.csvdata(csv_filename)
        num_lines = len(csv_data)
        # do nothing for empty data set
        if num_lines <= 1:
            getLogger(LOG).info('skip empty csv')
            from mriya.sf_merge_wrapper import HEADER
            result_ids = BulkData(HEADER, [])
        else:
            objname = job_syntax_item[endpoint]
            conn = self.endpoints.endpoint(endpoint)
            max_batch_size = int(job_syntax_item[BATCH_SIZE_KEY])
            getLogger(STDOUT).info('EXECUTE: %s %s, lines count=%d', opname,
                                   objname, num_lines - 1)
            t_before = time.time()
            if len(csv_data):

                result_ids = conn.soap_merge(objname, csv_data, max_batch_size)
            t_after = time.time()
            getLogger(STDOUT).info('SF %s Took time: %.2f' \
                                   % (opname, t_after-t_before))
        if NEW_IDS_TABLE in job_syntax_item:
            results_file_name = \
                 SqlExecutor.csv_name(job_syntax_item[NEW_IDS_TABLE])
            with open(results_file_name, 'w') as result_ids_file:
                csv_data = csv_from_bulk_data(result_ids)
                result_ids_file.write(csv_data)
            getLogger(LOG).info('Saved result ids: %s', results_file_name)
        getLogger(LOG).info('Done: %s operation', opname)
Example #2
0
 def handle_job_item_(self, job_syntax_item):
     if job_syntax_item and QUERY_KEY in job_syntax_item:
         query = job_syntax_item[QUERY_KEY].strip()
         is_csv = CSV_KEY in job_syntax_item
         is_var = VAR_KEY in job_syntax_item
         is_cache = CACHE_KEY in job_syntax_item
         if not query:
             return
         getLogger(LOG).info(job_syntax_item)
         if not is_var and is_csv and is_cache:
             csv_key_val = var_replaced(self.variables, job_syntax_item,
                                        CSV_KEY)
             csv_name = SqlExecutor.csv_name(csv_key_val)
             csv_size = SqlExecutor.csv_size(csv_key_val)
             if csv_size and SqlExecutor.valid_csv_exist(csv_key_val):
                 getLogger(LOG).info("SKIP query: '%s', csvfile exist: %s",
                                     query, csv_name)
                 return
         sql_exec = self.create_executor(job_syntax_item)
         try:
             sql_exec.execute()
         except:
             print job_syntax_item
             raise
         sql_exec.saved_csv()
         self.post_operation(job_syntax_item)
         self.variables = sql_exec.variables
         del sql_exec
 def _create_script(self, variables):
     imports = ''
     if CSVLIST_KEY in self.job_syntax_item:
         for table_name in self.job_syntax_item[CSVLIST_KEY]:
             #check if table name contain vars
             table_name = SqlExecutor.prepare_query_put_vars(
                 table_name, self.variables)
             imports += ".import {csv} {name}\n"\
                 .format(csv=self.csv_name(table_name), name=table_name)
     output = ''
     if CSV_KEY in self.job_syntax_item:
         table_name = var_replaced(variables, self.job_syntax_item, CSV_KEY)
         output += ".headers on\n"
         output += ".output {csv}\n"\
             .format(csv=self.csv_name(table_name))
         getLogger(LOG).info('working on table=%s', table_name)
     elif VAR_KEY in self.job_syntax_item:
         output += ".headers off\n"
         output += ".output stdout\n"
     elif BATCH_BEGIN_KEY in self.job_syntax_item:
         output += ".headers on\n"
         output += ".output stdout\n"
     getLogger(MOREINFO).info('EXECUTE [CSV]: %s', self.get_query())
     input_data = SQLITE_SCRIPT_FMT.format(imports=imports,
                                           output=output,
                                           query=self.get_query())
     return input_data
 def execute(self):
     t_before = time.time()
     executor = Executor()
     cmd = 'sqlite3 -batch'
     script = self._create_script(self.variables)
     getLogger(LOG).debug('Sqlite script:\n%s', script)
     executor.execute('refname', cmd, input_data=script, output_pipe=True)
     res = executor.poll_for_complete(observer)
     del executor
     t_after = time.time()
     csvname = ''
     if CSV_KEY in self.job_syntax_item:
         csvname = var_replaced(self.variables, self.job_syntax_item,
                                CSV_KEY)
         self.fix_empty_res_table(csvname)
         if ismoreinfo():
             getLogger(MOREINFO).info('%s.csv - %.2fs' %
                                      (csvname, t_after - t_before))
         else:
             getLogger(STDOUT).info('.')
     res = res['refname']
     if res[0] != 0:
         raise Exception("Sqlite query error", self.get_query())
     else:
         self._handle_var_create(res)
         retcode = res[1]
         return retcode
Example #5
0
    def handle_transmitter_op(self, job_syntax_item, endpoint):
        opname = job_syntax_item[OP_KEY]
        # run batches sequentially / parallel
        if BATCH_TYPE_KEY in job_syntax_item:
            if job_syntax_item[BATCH_TYPE_KEY] == BATCH_TYPE_PARALLEL_KEY:
                batch_seq = False
            elif job_syntax_item[BATCH_TYPE_KEY] == BATCH_TYPE_SEQUENTIAL_KEY:
                batch_seq = True
            else:
                getLogger(STDERR).error('Unknown batch type: %s',
                                        job_syntax_item[BATCH_TYPE_KEY])
                exit(1)
        else:
            batch_seq = False  # parallel by default
        csv_key_val = var_replaced(self.variables, job_syntax_item, CSV_KEY)
        csv_filename = SqlExecutor.csv_name(csv_key_val)
        csv_data = self.csvdata(csv_filename)
        num_lines = len(csv_data)
        # do nothing for empty data set
        if num_lines <= 1:
            getLogger(LOG).info('skip empty csv')
            stub = ['"Id","Success","Created","Error"\n']
            result_ids = parse_batch_res_data(stub)
        else:
            objname = job_syntax_item[endpoint]
            conn = self.endpoints.endpoint(endpoint)
            max_batch_size = int(job_syntax_item[BATCH_SIZE_KEY])
            getLogger(STDOUT).info('EXECUTE: %s %s, lines count=%d', opname,
                                   objname, num_lines - 1)
            t_before = time.time()
            if len(csv_data):
                if opname == OP_UPDATE:
                    res = conn.bulk_update(objname, csv_data, max_batch_size,
                                           batch_seq)
                elif opname == OP_DELETE:
                    res = conn.bulk_delete(objname, csv_data, max_batch_size,
                                           batch_seq)
                elif opname == OP_INSERT:
                    res = conn.bulk_insert(objname, csv_data, max_batch_size,
                                           batch_seq)
                else:
                    getLogger(STDERR).error("Operation '%s' isn't supported" %
                                            opname)
                    exit(1)

                result_ids = parse_batch_res_data(res)

            t_after = time.time()
            getLogger(STDOUT).info('SF %s Took time: %.2f' \
                                   % (opname, t_after-t_before))
        if NEW_IDS_TABLE in job_syntax_item:
            results_file_name = \
                 SqlExecutor.csv_name(job_syntax_item[NEW_IDS_TABLE])
            with open(results_file_name, 'w') as result_ids_file:
                csv_data = csv_from_bulk_data(result_ids)
                result_ids_file.write(csv_data)
            getLogger(LOG).info('Saved result ids: %s', results_file_name)
        getLogger(LOG).info('Done: %s operation', opname)
    def handle_result(self, bulk_res):
        # handle empty result - fix it by adding column names
        if bulk_res and bulk_res[0] == EMPTY_SF_RESPONSE:
            cols = SqlExecutor.get_query_columns(self.get_query())
            header = ','.join(cols)
            bulk_res = [header]

        if len(bulk_res) > 1:
            #ignore last empty results
            bulk_res = bulk_res[:-1]

        # handle result
        if CSV_KEY in self.job_syntax_item:
            csv_key_val = var_replaced(self.variables, self.job_syntax_item,
                                       CSV_KEY)
            csvfname = SqlExecutor.csv_name(csv_key_val)
            bulk_data.save_escape_csv_lines_as_csv_file(csvfname, bulk_res)
        elif VAR_KEY in self.job_syntax_item:
            res = bulk_data.parse_batch_res_data(bulk_res)
            if res.rows:
                self.save_var(self.job_syntax_item[VAR_KEY], res.rows[0][0])