Example #1
0
 def handle_job_item_(self, job_syntax_item):
     if job_syntax_item and QUERY_KEY in job_syntax_item:
         query = job_syntax_item[QUERY_KEY].strip()
         is_csv = CSV_KEY in job_syntax_item
         is_var = VAR_KEY in job_syntax_item
         is_cache = CACHE_KEY in job_syntax_item
         if not query:
             return
         getLogger(LOG).info(job_syntax_item)
         if not is_var and is_csv and is_cache:
             csv_key_val = var_replaced(self.variables, job_syntax_item,
                                        CSV_KEY)
             csv_name = SqlExecutor.csv_name(csv_key_val)
             csv_size = SqlExecutor.csv_size(csv_key_val)
             if csv_size and SqlExecutor.valid_csv_exist(csv_key_val):
                 getLogger(LOG).info("SKIP query: '%s', csvfile exist: %s",
                                     query, csv_name)
                 return
         sql_exec = self.create_executor(job_syntax_item)
         try:
             sql_exec.execute()
         except:
             print job_syntax_item
             raise
         sql_exec.saved_csv()
         self.post_operation(job_syntax_item)
         self.variables = sql_exec.variables
         del sql_exec
Example #2
0
    def handle_transmitter_merge(self, job_syntax_item, endpoint):
        opname = job_syntax_item[OP_KEY]
        csv_key_val = var_replaced(self.variables, job_syntax_item, CSV_KEY)
        csv_filename = SqlExecutor.csv_name(csv_key_val)
        csv_data = self.csvdata(csv_filename)
        num_lines = len(csv_data)
        # do nothing for empty data set
        if num_lines <= 1:
            getLogger(LOG).info('skip empty csv')
            from mriya.sf_merge_wrapper import HEADER
            result_ids = BulkData(HEADER, [])
        else:
            objname = job_syntax_item[endpoint]
            conn = self.endpoints.endpoint(endpoint)
            max_batch_size = int(job_syntax_item[BATCH_SIZE_KEY])
            getLogger(STDOUT).info('EXECUTE: %s %s, lines count=%d', opname,
                                   objname, num_lines - 1)
            t_before = time.time()
            if len(csv_data):

                result_ids = conn.soap_merge(objname, csv_data, max_batch_size)
            t_after = time.time()
            getLogger(STDOUT).info('SF %s Took time: %.2f' \
                                   % (opname, t_after-t_before))
        if NEW_IDS_TABLE in job_syntax_item:
            results_file_name = \
                 SqlExecutor.csv_name(job_syntax_item[NEW_IDS_TABLE])
            with open(results_file_name, 'w') as result_ids_file:
                csv_data = csv_from_bulk_data(result_ids)
                result_ids_file.write(csv_data)
            getLogger(LOG).info('Saved result ids: %s', results_file_name)
        getLogger(LOG).info('Done: %s operation', opname)
 def fix_empty_res_table(self, table_name):
     size = SqlExecutor.csv_size(table_name)
     if size == 0:
         getLogger(LOG).debug("Fix empty csv for table %s", table_name)
         cols = SqlExecutor.get_query_columns(self.get_query())
         header = ','.join(cols) + '\n'
         with open(self.csv_name(table_name), 'w') as f:
             f.write(header)
Example #4
0
    def handle_transmitter_op(self, job_syntax_item, endpoint):
        opname = job_syntax_item[OP_KEY]
        # run batches sequentially / parallel
        if BATCH_TYPE_KEY in job_syntax_item:
            if job_syntax_item[BATCH_TYPE_KEY] == BATCH_TYPE_PARALLEL_KEY:
                batch_seq = False
            elif job_syntax_item[BATCH_TYPE_KEY] == BATCH_TYPE_SEQUENTIAL_KEY:
                batch_seq = True
            else:
                getLogger(STDERR).error('Unknown batch type: %s',
                                        job_syntax_item[BATCH_TYPE_KEY])
                exit(1)
        else:
            batch_seq = False  # parallel by default
        csv_key_val = var_replaced(self.variables, job_syntax_item, CSV_KEY)
        csv_filename = SqlExecutor.csv_name(csv_key_val)
        csv_data = self.csvdata(csv_filename)
        num_lines = len(csv_data)
        # do nothing for empty data set
        if num_lines <= 1:
            getLogger(LOG).info('skip empty csv')
            stub = ['"Id","Success","Created","Error"\n']
            result_ids = parse_batch_res_data(stub)
        else:
            objname = job_syntax_item[endpoint]
            conn = self.endpoints.endpoint(endpoint)
            max_batch_size = int(job_syntax_item[BATCH_SIZE_KEY])
            getLogger(STDOUT).info('EXECUTE: %s %s, lines count=%d', opname,
                                   objname, num_lines - 1)
            t_before = time.time()
            if len(csv_data):
                if opname == OP_UPDATE:
                    res = conn.bulk_update(objname, csv_data, max_batch_size,
                                           batch_seq)
                elif opname == OP_DELETE:
                    res = conn.bulk_delete(objname, csv_data, max_batch_size,
                                           batch_seq)
                elif opname == OP_INSERT:
                    res = conn.bulk_insert(objname, csv_data, max_batch_size,
                                           batch_seq)
                else:
                    getLogger(STDERR).error("Operation '%s' isn't supported" %
                                            opname)
                    exit(1)

                result_ids = parse_batch_res_data(res)

            t_after = time.time()
            getLogger(STDOUT).info('SF %s Took time: %.2f' \
                                   % (opname, t_after-t_before))
        if NEW_IDS_TABLE in job_syntax_item:
            results_file_name = \
                 SqlExecutor.csv_name(job_syntax_item[NEW_IDS_TABLE])
            with open(results_file_name, 'w') as result_ids_file:
                csv_data = csv_from_bulk_data(result_ids)
                result_ids_file.write(csv_data)
            getLogger(LOG).info('Saved result ids: %s', results_file_name)
        getLogger(LOG).info('Done: %s operation', opname)
 def _create_script(self, variables):
     imports = ''
     if CSVLIST_KEY in self.job_syntax_item:
         for table_name in self.job_syntax_item[CSVLIST_KEY]:
             #check if table name contain vars
             table_name = SqlExecutor.prepare_query_put_vars(
                 table_name, self.variables)
             imports += ".import {csv} {name}\n"\
                 .format(csv=self.csv_name(table_name), name=table_name)
     output = ''
     if CSV_KEY in self.job_syntax_item:
         table_name = var_replaced(variables, self.job_syntax_item, CSV_KEY)
         output += ".headers on\n"
         output += ".output {csv}\n"\
             .format(csv=self.csv_name(table_name))
         getLogger(LOG).info('working on table=%s', table_name)
     elif VAR_KEY in self.job_syntax_item:
         output += ".headers off\n"
         output += ".output stdout\n"
     elif BATCH_BEGIN_KEY in self.job_syntax_item:
         output += ".headers on\n"
         output += ".output stdout\n"
     getLogger(MOREINFO).info('EXECUTE [CSV]: %s', self.get_query())
     input_data = SQLITE_SCRIPT_FMT.format(imports=imports,
                                           output=output,
                                           query=self.get_query())
     return input_data
    def get_query(self):
        if not self.query:
            self.query = SqlExecutor.prepare_query_put_vars(
                self.job_syntax_item[QUERY_KEY], self.variables)
            # get csvlist after variables substitution
            values = JobSyntax.parse_query_params(self.get_query(), {})
            if CSVLIST_KEY in values:
                getLogger(LOG).debug('Fix csvlist: %s', values[CSVLIST_KEY])
                if CSVLIST_KEY in self.job_syntax_item:
                    tmp = self.job_syntax_item[CSVLIST_KEY]
                    tmp.extend(values[CSVLIST_KEY])
                    # filter out duplicate items
                    self.job_syntax_item[CSVLIST_KEY] = list(Set(tmp))
                else:
                    self.job_syntax_item[CSVLIST_KEY] = values[CSVLIST_KEY]

            if not CONST_KEY in self.job_syntax_item:
                self.query = self.query.replace(CSV_KEY + '.', '')
                self.query = self.query.replace(DST_KEY + '.', '')
                self.query = self.query.replace(SRC_KEY + '.', '')

            # end query with ';' add if ';' not exist
            if self.query and self.query[-1] != ';':
                self.query += ';'
        return self.query
def test_job_controller(mock_docall, m):
    # mock setup
    sf_bulk_connector.JOB_CHECK_TIMER = 0
    mockers.mock_job_controller(mock_docall, m)
    # test itself
    setdatadir(tempfile.mkdtemp())
    # test debug coverage
    mriya.log.INITIALIZED_LOGGERS = {}
    mriya.log.LOGGING_LEVEL = logging.DEBUG
    loginit(__name__)
    print "test_job_controller"

    # prepare test_csv.csv
    test_csv = ['"Alexa__c"', '"hello\n\n2"']
    with open(SqlExecutor.csv_name('test_csv'), "w") as test_csv_f:
        test_csv_f.write('"Alexa__c"\n"hello<N CR><N CR>2"\n')

    notch = 'test1234567'
    with open('tests/sql/complicated.sql') as sql_f:
        lines = sql_f.readlines()
    job_syntax = JobSyntaxExtended(lines)
    with open(config_filename) as conf_file:
        job_controller = JobController(conf_file.name, endpoint_names,
                                       job_syntax, {}, False)
    job_controller.run_job()
    del job_controller
    # check resulted data
    with open(SqlExecutor.csv_name('some_data_staging')) as resulted_file:
        csv_data = get_bulk_data_from_csv_stream(resulted_file)
        name_idx = csv_data.fields.index('Name')
        assert 1 == len(csv_data.rows)
        assert csv_data.rows[0][name_idx] == notch
    with open(SqlExecutor.csv_name('newids')) as newids_file:
        csv_data = get_bulk_data_from_csv_stream(newids_file)
        id_idx = csv_data.fields.index('Id')
        try:
            assert 1 == len(csv_data.rows)
        except:
            print "len(csv_data.rows)", len(csv_data.rows)
            raise
        for row in csv_data.rows:
            assert len(row[id_idx]) >= 15

    assert open(SqlExecutor.csv_name('test_csv')).read() \
        == open(SqlExecutor.csv_name('test_csv_2')).read()
    # run another test using created data
    run_test_graph(datadir(), "tests/sql/complicated.sql")
    def handle_result(self, bulk_res):
        # handle empty result - fix it by adding column names
        if bulk_res and bulk_res[0] == EMPTY_SF_RESPONSE:
            cols = SqlExecutor.get_query_columns(self.get_query())
            header = ','.join(cols)
            bulk_res = [header]

        if len(bulk_res) > 1:
            #ignore last empty results
            bulk_res = bulk_res[:-1]

        # handle result
        if CSV_KEY in self.job_syntax_item:
            csv_key_val = var_replaced(self.variables, self.job_syntax_item,
                                       CSV_KEY)
            csvfname = SqlExecutor.csv_name(csv_key_val)
            bulk_data.save_escape_csv_lines_as_csv_file(csvfname, bulk_res)
        elif VAR_KEY in self.job_syntax_item:
            res = bulk_data.parse_batch_res_data(bulk_res)
            if res.rows:
                self.save_var(self.job_syntax_item[VAR_KEY], res.rows[0][0])
 def get_query(self):
     if not self.query:
         self.query = SqlExecutor.prepare_query_put_vars(
             self.job_syntax_item[QUERY_KEY], self.variables)
         if not CONST_KEY in self.job_syntax_item:
             self.query = self.query.replace(CSV_KEY + '.', '')
             self.query = self.query.replace(DST_KEY + '.', '')
             self.query = self.query.replace(SRC_KEY + '.', '')
         # get rid of trailing ';' in query
         if self.query and self.query[-1] == ';':
             self.query = self.query[:-1]
     return self.query
Example #10
0
def test_columns():
    loginit(__name__)
    query = "SELECT foo, (SELECT 1 WHERE 0=1) as foo2, foo as foo3, \
a.foo4 FROM "

    res = SqlExecutor.get_query_columns(query)
    try:
        assert (res == ['foo', 'foo2', 'foo3', 'foo4'])
    except:
        print res
        raise

    query = " SELECT Id as id2, \
b.bomba, (SELECT a.Id FROM foo a WHERE a.nope = b.Id) as id__c, \
(SELECT a.Id from foo2 a WHERE a.ggg = b.nnn) as super \
FROM sometable b WHERE ..."

    res = SqlExecutor.get_query_columns(query)
    try:
        assert (res == ['id2', 'bomba', 'id__c', 'super'])
    except:
        print res
        raise
Example #11
0
 def run_job(self):
     batch_items = None
     batch = None
     for job_syntax_item in self.job_syntax:
         if self.debug_steps:
             print "NEXT SQL:", SqlExecutor.prepare_query_put_vars(
                 job_syntax_item['line'], self.variables)
             print "continue execution? y/n"
             if not self.step_by_step():
                 exit(0)
         if BATCH_KEY in job_syntax_item:
             self.run_in_loop(job_syntax_item)
         else:
             self.handle_job_item_(job_syntax_item)
Example #12
0
 def __init__(self, config_filename, endpoint_names, job_syntax, variables,
              debug_steps):
     #loginit(__name__)
     self.config = None
     if config_filename:
         self.config_file = open(config_filename)
         self.config = ConfigParser()
         self.config.read_file(self.config_file)
     self.job_syntax = job_syntax
     self.endpoints = Endpoints(self.config, endpoint_names)
     self.variables = variables
     self.debug_steps = debug_steps
     # create csv file for an internal batch purpose
     ints1000_csv = SqlExecutor.csv_name(INTS_TABLE)
     with open(ints1000_csv, 'w') as ints:
         ints.write('i\n')
         for i in xrange(10001):
             ints.write('%d\n' % i)
         ints.flush()
Example #13
0
def add_item_to_graph(item_x, idx, graph_nodes, csvdir, aggregated_csvs):
    edges = []
    # get csv relations
    if CSVLIST_KEY in item_x:
        edges.extend(item_x[CSVLIST_KEY])
    elif OBJNAME_KEY in item_x:
        node_name = item_x[FROM_KEY] + '.' + item_x[OBJNAME_KEY]
        edges.append(node_name)
        graph_nodes[node_name] = GraphNodeData(id=idx, edges=[],
                                               shape=SHAPE_BOX,
                                               color=COLOR_GREEN,
                                               style='',
                                               info=EXTERNAL_OBJECT_READ,
                                               href='')
        idx = idx + 1
    # get var relations
    if QUERY_KEY in item_x:
        edges.extend(SqlExecutor.get_query_var_names(item_x[QUERY_KEY]))

    csvhref, nodeinfo = ('','')

    # var nodes
    if VAR_KEY in item_x:
        node_name = item_x[VAR_KEY]
        color=COLOR_GREEN
        csvhref, nodeinfo = get_href_info(item_x, node_name, csvdir, aggregated_csvs)
        nodeinfo += 'query: %s' % item_x[LINE_KEY]
        if PUBLISH_KEY in item_x:
            color=COLOR_RED
        if node_name in graph_nodes:
            edges.extend(graph_nodes[node_name].edges)
        graph_nodes[node_name] = GraphNodeData(id=idx, edges=list(Set(edges)),
                                               shape=SHAPE_ELLIPSE,
                                               style=STYLE_DASHED,
                                               color=color,
                                               info=nodeinfo,
                                               href=csvhref)
        print '%s : "%s"\n' % (item_x[VAR_KEY], item_x[LINE_KEY])
        idx = idx + 1
    # csv nodes  
    elif CSV_KEY in item_x:
        node_name = item_x[CSV_KEY]
        csvhref, nodeinfo = get_href_info(item_x, node_name, csvdir, aggregated_csvs)
        nodeinfo += 'query: %s' % item_x[LINE_KEY]
        if node_name in graph_nodes:
            edges.extend(graph_nodes[node_name].edges)
        graph_nodes[node_name] = GraphNodeData(id=idx, edges=list(Set(edges)),
                                               shape=SHAPE_ELLIPSE,
                                               color='',
                                               style='',
                                               info=nodeinfo,
                                               href=csvhref)
        print '%s : "%s"\n' % (item_x[CSV_KEY], item_x[LINE_KEY])
        if OP_KEY in item_x:
            idx = idx + 1
            if DST_KEY in item_x:
                node2_name = 'dst:%s:%s' % (item_x[OP_KEY],
                                            item_x[DST_KEY])
            elif SRC_KEY in item_x:
                node2_name = 'src:%s:%s' % (item_x[OP_KEY],
                                            item_x[SRC_KEY])
            while node2_name in graph_nodes:
                node2_name += '.'
            graph_nodes[node2_name] \
                = GraphNodeData(id=idx, edges=[node_name],
                                shape=SHAPE_BOX, color=COLOR_RED,
                                style='',
                                info=EXTERNAL_OBJECT_WRITE,
                                href='')
            idx = idx + 1
            # add node as result of operation
            node3_name = item_x[NEW_IDS_TABLE]
            csvhref, nodeinfo = get_href_info(item_x, node3_name, csvdir,
                                              aggregated_csvs)
            nodeinfo += EXTERNAL_OBJECT_RESULT
            graph_nodes[node3_name] \
                = GraphNodeData(id=idx, edges=[node2_name],
                                shape=SHAPE_ELLIPSE, color='',
                                style='',
                                info=nodeinfo,
                                href=csvhref)
            
    idx = idx + 1
    return (idx, graph_nodes)
Example #14
0
def test_var_csv(mock_docall, m):
    # mock setup
    sf_bulk_connector.JOB_CHECK_TIMER = 0
    mockers.mock_var_csv(mock_docall, m)
    # test itself
    setdatadir(tempfile.mkdtemp())
    loginit(__name__)
    print "test_var_csv"
    macro_lines = [
        'SELECT i from csv.ints10000 WHERE i>=CAST(10 as INTEGER) \
LIMIT 2; => batch_begin:i:NESTED', 'SELECT {NESTED}; => var:foo2',
        '=> batch_end:NESTED', "SELECT '{STATIC_VAR}'; => var:static_var"
    ]

    lines = [
        'SELECT 1; => var:one',
        "SELECT 'csv.ints10000'; => var:CSV_INTS10000 => const:",
        "SELECT * FROM {CSV_INTS10000} LIMIT 1; => var:VAR_0",
        'SELECT "9+0"; => var:nine',
        'SELECT Id FROM src.Account LIMIT 1 => csv:sfvar',
        'SELECT * FROM csv.sfvar => var:sfvar',
        'SELECT {one} as f1, {nine}+1 as f2; => csv:one_ten',
        "SELECT 'one_nine_ten' => var:ONENINETEN",
        'SELECT f1, {nine} as f9, (SELECT f2 FROM csv.one_ten) as f10 \
FROM csv.one_ten; => csv:{ONENINETEN}',
        'SELECT i from csv.ints10000 WHERE i>=2 LIMIT 2; \
=> batch_begin:i:PARAM', 'SELECT {PARAM}; => var:foo',
        '=> macro:macro_test_batch:STATIC_VAR:something', '=> batch_end:PARAM',
        'SELECT {PARAM}; => var:final_test'
    ]

    expected = [{
        'from': 'csv',
        'query': 'SELECT 1;',
        'var': 'one'
    }, {
        'from': 'csv',
        'query': "SELECT 'csv.ints10000';",
        'var': 'CSV_INTS10000',
        'const': '',
        'from': 'csv'
    }, {
        'from': 'csv',
        'query': "SELECT * FROM {CSV_INTS10000} LIMIT 1;",
        'var': 'VAR_0'
    }, {
        'from': 'csv',
        'query': 'SELECT "9+0";',
        'var': 'nine'
    }, {
        'objname': 'Account',
        'query': 'SELECT Id FROM Account LIMIT 1',
        'csv': 'sfvar',
        'from': 'src',
        'objname': 'Account'
    }, {
        'query': 'SELECT * FROM sfvar',
        'var': 'sfvar',
        'csvlist': ['sfvar'],
        'from': 'csv'
    }, {
        'query': 'SELECT {one} as f1, {nine}+1 as f2;',
        'csv': 'one_ten'
    }, {
        'from': 'csv',
        'query': "SELECT 'one_nine_ten'",
        'var': 'ONENINETEN'
    }, {
        'query': 'SELECT f1, {nine} as f9, (SELECT f2 FROM \
one_ten) as f10 FROM one_ten;',
        'csvlist': ['one_ten'],
        'csv': '{ONENINETEN}',
        'from': 'csv'
    }, {
        'batch': [{
            'line': 'SELECT {PARAM}; => var:foo',
            'query': 'SELECT {PARAM};',
            'var': 'foo',
            'from': 'csv'
        }, {
            'batch': [{
                'line': 'SELECT {NESTED}; => var:foo2',
                'query': 'SELECT {NESTED};',
                'var': 'foo2',
                'from': 'csv'
            }],
            'batch_begin': ('i', 'NESTED'),
            'csvlist': ['ints10000'],
            'from':
            'csv',
            'line':
            'SELECT i from csv.ints10000 WHERE i>=CAST(10 as INTEGER) LIMIT 2; => batch_begin:i:NESTED',
            'query':
            'SELECT i from ints10000 WHERE i>=CAST(10 as INTEGER) LIMIT 2;'
        }, {
            'line': "SELECT 'something'; => var:static_var",
            'query': "SELECT 'something';",
            'var': 'static_var',
            'from': 'csv'
        }],
        'batch_begin': ('i', 'PARAM'),
        'csvlist': ['ints10000'],
        'from':
        'csv',
        'query':
        'SELECT i from ints10000 WHERE i>=2 LIMIT 2;'
    }, {
        'from': 'csv',
        'query': 'SELECT {PARAM};',
        'var': 'final_test'
    }]

    job_syntax_extended = JobSyntaxExtended(lines,
                                            {'macro_test_batch': macro_lines})
    assert_job_syntax_lines(job_syntax_extended.items(), expected)
    try:
        os.remove(SqlExecutor.csv_name('one_nine_ten'))
    except:
        pass
    with open(config_filename) as conf_file:
        job_controller = JobController(conf_file.name, endpoint_names,
                                       job_syntax_extended, {}, False)
    job_controller.run_job()
    res_batch_params = job_controller.variables[BATCH_PARAMS_KEY]
    assert res_batch_params == ['2', '3']
    sfvar = job_controller.variables['sfvar']
    assert len(sfvar) >= 15
    final_param = job_controller.variables['final_test']
    assert final_param == '3'
    del job_controller
    with open(SqlExecutor.csv_name('one_nine_ten')) as resulted_file:
        assert resulted_file.read() == 'f1,f9,f10\n1,9,10\n'