Esempio n. 1
0
    def flatten_subquery(self, final_list, sub_queries, level_num):

        for q in sub_queries:
            for alias, query in q.items():
                formatter = column_parser.Parser(query)
                formatted_query = formatter.format_query(query)
                unbundled = unbundle.Unbundle(formatted_query)
                query_dict = {}
                if unbundled.has_child(query):
                    if alias == 'no alias' or alias == '' or alias == 'query':
                        query_dict, sub_queries = unbundled.restructure_subquery(
                            query_dict, 'level_{}_main'.format(level_num),
                            formatted_query)
                    else:
                        query_dict, sub_queries = unbundled.restructure_subquery(
                            query_dict, alias, formatted_query)

            if query_dict != {}:
                final_list.append(query_dict)

            for subq in sub_queries:
                for _, sub_query in subq.items():
                    if not unbundled.has_child(sub_query):
                        final_list.append(subq)
                        sub_queries.remove(subq)

        return final_list, sub_queries
Esempio n. 2
0
    def delevel(self, query):
        formatter = column_parser.Parser(query)
        formatted_query = formatter.format_query(query)
        query_list = formatted_query.split('\n')

        main_query, copy_query_list = self.get_sub_query(query_list)
        main_query, sub_queries = self.separator(copy_query_list, main_query)

        return main_query, sub_queries
Esempio n. 3
0
    def parse_query(self, raw_query):

        formatter = column_parser.Parser(raw_query)
        formatted_query = formatter.format_query(raw_query)

        if 'WITH' in formatted_query:

            if formatted_query.startswith('WITH'):

                cte_dict = formatter.parse_cte(formatted_query)
                unbundled = unbundle.Unbundle(formatted_query)
                final_list = self.flatten_cte_nested(unbundled, cte_dict)

            else:
                sub_query_list = self.flatten_pure_nested(formatted_query)
                final_list = []
                for q in sub_query_list:

                    for alias, query in q.items():
                        if 'WITH' in query:
                            formatter = column_parser.Parser(query)
                            formatted_query = formatter.format_query(query)

                            cte_dict = formatter.parse_cte(formatted_query)
                            unbundled = unbundle.Unbundle(formatted_query)

                            cte_list = self.flatten_cte_nested(
                                unbundled, cte_dict)
                            final_list.append({alias: cte_list})

                        else:
                            final_list.append(q)

        else:
            final_list = self.flatten_pure_nested(raw_query)

        return final_list
def formatter(sample_query):
    formatter = column_parser.Parser(sample_query)
    return formatter
Esempio n. 5
0
def extract_subquery_fields(query, db_fields):
    formatter = column_parser.Parser(query)
    formatted = formatter.format_query(query)
    fields = formatter.match_queried_fields(formatted, db_fields)
    return fields
Esempio n. 6
0
def main(query):

    formatter = column_parser.Parser(query)
    formatted_query = formatter.format_query(query)
    query_list_0 = formatted_query.split('\n')
    query_dict = {}
    sub_query = delevel(query_list_0)
    query_dict = sub_query

    for alias, query in sub_query.items():

        formatter = column_parser.Parser(query)
        formatted_query = formatter.format_query(query)
        query_list = formatted_query.split('\n')

        if has_child(formatted_query) and alias != 'main':
            sub_query_dict = delevel(query_list)
            query_dict[alias] = sub_query_dict
            query_dict = clean_dict(query_dict)

            # for alias2, query2 in sub_query_dict.items():
            #     formatter2 = column_parser.Parser(query2)
            #     formatted_query2 = formatter2.format_query(query2)
            #     query_list2 = formatted_query2.split('\n')

            #     if has_child(formatted_query2) and alias2 != 'main':
            #         sub_query_dict2 = delevel(query_list2)
            #         sub_query_dict[alias2] = sub_query_dict2

            # for alias3, query3 in sub_query_dict2.items():
            #     formatter3 = column_parser.Parser(query3)
            #     formatted_query3 = formatter3.format_query(query3)
            #     query_list3 = formatted_query3.split('\n')

            #     try:
            #         if has_child(formatted_query3) and alias3 != 'main':
            #             sub_query_dict3 = delevel(query_list3)
            #             sub_query_dict2[alias3] = sub_query_dict3

            #             for alias4, query4 in sub_query_dict3.items():
            #                 formatter4 = column_parser.Parser(query4)
            #                 formatted_query4 = formatter4.format_query(query4)
            #                 query_list4 = formatted_query4.split('\n')

            #                 # try:
            #                 #     if has_child(formatted_query4) and alias4 != 'main':
            #                 #         sub_query_dict4 = delevel(query_list4)
            #                 #         sub_query_dict3[alias4] = sub_query_dict4
            #                 #     else:
            #                 #         pass
            #                 # except:
            #                 #     pass

            #             query_dict[alias][alias2][alias3] = sub_query_dict3
            #             query_dict = clean_dict(query_dict)
            #         else:
            #             pass
            #     except:
            #         pass

            #     query_dict[alias][alias2] = sub_query_dict2
            #     query_dict = clean_dict(query_dict)
            # else:
            #     pass
            # query_dict[alias] = sub_query_dict
            # query_dict = clean_dict(query_dict)
        else:
            pass

    return query_dict
Esempio n. 7
0
#                         else:
#                             l_path[k + '_' + k1 + '_' + k2] = v2
#                 else:
#                     l_path[k + '_' + k1] = v1

#         else:
#             l_path[k] = v
#     return l_path

if __name__ == '__main__':

    # query = open('query.sql').read()
    # query = open('long_query.sql').read()
    #### BUG: nested was not detected ####
    query = open('test_query.sql').read()
    formatter = column_parser.Parser(query)
    formatted_query = formatter.format_query(query)
    query_list = formatted_query.split('\n')

    if is_cte(formatted_query):
        cte_dict = formatter.parse_cte(formatted_query)
        final_dict = {}
        for alias, query in cte_dict.items():
            formatter = column_parser.Parser(query)
            formatted_query = formatter.format_query(query)
            try:
                final_dict[alias] = main(formatted_query)
            except:
                final_dict[alias] = formatted_query

        with open('data.json', 'w') as outfile:
Esempio n. 8
0
                                    if isinstance(sub_sub_query, str):
                                        sub_list.append(sub_sub_query)

                    preprocess_list.extend(sub_list)

    return preprocess_list


if __name__ == '__main__':

    t0 = time.perf_counter()
    raw_query = open('queries/{}.sql'.format(sys.argv[1])).read()

    # formatting and analyze the query structure
    formatter = column_parser.Parser(raw_query)
    formatted = formatter.format_query(raw_query)
    analyzer = query_analyzer.Analyzer(formatted)
    query_dict = analyzer.parse_query(formatted)

    # preprocess query breakdowns
    preprocess_list = unnest_query_list(query_dict)

    # getting metastore
    db_fields_1 = pd.DataFrame({
        'db_table':
        'sfdc.opportunity_product',
        'all_columns': [
            'actual_quantity_c', 'annual_list_price_value_c',
            'annual_product_value_c', 'annual_recurring_revenue_c',
            'contract_is_12_months_or_more_c', 'created_by_id', 'created_date',