예제 #1
0
 def convert(matchobj):
     data_oid = int(matchobj.group(1))
     if matchobj.group(2) == "article":
         if data_oid in od:
             new_data_oid = od[data_oid]
         else:
             new_data_oid = 0
             log_me("No match for link: {}".format(matchobj.group(0)))
         return "<a data-oid=\"{0}\" data-otype=\"article\" href=\"/article/{0}/\"></a>".format(
             new_data_oid)
     elif matchobj.group(2) == "query":
         query_title = matchobj.group(4)
         if not queries.empty:
             q_match = queries.title == query_title
             if q_match.any():
                 matching_queries = queries[q_match]
                 # m is a reference to somewhere and we need to fix it.
                 oid = (matching_queries.iloc[-1]).id  # -1 means last
                 return "<a data-oid=\"{0}\" data-otype=\"query\" href=\"/query/{0}/\"></a>".format(
                     oid)
     elif matchobj.group(2) == "table":
         qual_name = matchobj.group(4).split()[0]
         tb = target.get_tables_by_name(qual_name)
         if not tb.empty:
             oid = tb.index[-1]
             return "<a data-oid=\"{0}\" data-otype=\"table\" href=\"/table/{0}/\"></a>".format(
                 oid)
예제 #2
0
 def send_statement(self, statement, final=False):
     try:
         #log_me(statement)
         cursor = self.connection.cursor()
         if not cursor:
             raise("Connection error")
         cursor.execute(statement)
         self.connection.commit()
         cursor.close()
         return True
     except (Exception, DatabaseError) as error:
         log_me(f"Error while executing {statement}:\n{error}")
         # Remember all the types that are not supported by Postgres
         # by saving the type name in dict missing
         match = re.search(r'type "(\w+)" does not exist', error.pgerror)
         if match:
             if match.group(1) in missing:
                 missing[match.group(1)] +=1
             else:
                 log_me(error)
                 missing[match.group(1)] = 1
         cursor.close()
         self.connection.rollback()
         return False
     finally:
         # closing database connection.
         if final:
             cursor.close()
             self.connection.close()
예제 #3
0
    def __init__(self, name, parent=None):
        name = name.strip()
        self.name = name  # this is the short name
        self.parent = parent  # this is the long name of the parent

        reg = f'{self}'  # this is a long name, taking advantage of recursion, calling __repr__
        # The external ID includes BI Server ID and a random component, too
        # that will be the key to the registry
        external_id = create_external_id(reg)
        if external_id not in Folder.registry:
            # ---- create BI folder -----------
            bif = [
                create_folder_object(name, external_id, parent_folder=parent)
            ]
            api = f'{bi_server_url}{bi_server}/folder/'
            # if parent:
            #     print(f'Creating "{parent}/{name}"')
            # else:
            #     print(f'Creating "{name}"')
            r = alation.generic_api_post(api, body=bif)
            if 'status' in r:
                if r['status'] == 'successful':
                    log_me(f'{external_id}:{r["result"]}')
            # ---- keep track of what was created ----
            Folder.registry[external_id] = self
예제 #4
0
 def check_sequence(self, first):
     # first is the ID of the first article (top of the hierarchy)
     # it will be the last to be created on the target...
     # we need to put the articles in a logical order.
     # we put the first in front, but we expect it to be pushed all the way to the last when we are done
     order = deque([first])
     # the to-do-list is all articles without the first
     to_do_list = deque(self.article.index)
     to_do_list.remove(first)  # we have taken care of the first already
     while to_do_list:
         # get the right most item
         last = order[-1]
         # we either remove a child or the next in the to-do list
         # do we have children?
         current_children = deque(self.article.children[last])
         while current_children:
             c = current_children.pop()
             try:
                 # move to the top of the to-do list
                 to_do_list.remove(c['id'])
                 to_do_list.appendleft(c['id'])
             except:
                 log_me(
                     f"WARNING --- Article {c['id']}/{c['title']} does not appear to be loaded."
                 )
         order.append(to_do_list.popleft())  # next one
     return order
예제 #5
0
 def convert_references(self):
     # First pass: create a DataFrame of target articles with
     # New articles that are being migrated or referenced
     # All references to articles "zero-ed out" - will be re-calculated in Second Pass
     # The title gets saved in the title attribute of the anchor (safer)
     for a in self.article.itertuples():
         soup = BeautifulSoup(a.body, "html5lib")
         # Find all Anchors
         match = soup.findAll('a')
         for m in match:
             # We only care about Alation anchors, identified by the attr data-oid
             if 'data-oid' in m.attrs:
                 oid = m['data-oid']
                 otype = m['data-otype']
                 if otype == 'article':
                     try:
                         actual_title = self.article.at[int(oid), 'title']
                     except:
                         log_me(u"Warning! Ref to article not found {}->{}".
                                format(a.title, m.get_text()))
                         actual_title = m.get_text()
                     m.string = actual_title
                     m['data-oid'] = 0
                     del m['href']
                     m['title'] = actual_title
                     self.article.at[a.Index, 'body'] = soup.prettify(
                     )  # update the article body
                 else:
                     #log_me(m)
                     m['data-oid'] = 0
                     del m['href']
                     m['title'] = m.get_text()
                     self.article.at[a.Index, 'body'] = soup.prettify(
                     )  # update the article body
예제 #6
0
def update_datasource(alation_instance, ds_id, schemas, warnings=None):
    if warnings:
        # get all flags to see if we need to append our warnings
        flags_raw = alation_instance.generic_api_get(api=f"/integration/flag/?oid={ds_id}&otype=data", official=True)
        if flags_raw:
            existing_warning_text = ""
            existing_warning_id = None
            for flag in flags_raw:
                # there can be at most one warning
                if flag.get('flag_type')=='WARNING':
                    existing_warning_text = flag.get('flag_reason')
                    existing_warning_id = flag.get('id')
            new_warning_text = existing_warning_text + "Missing tables: " +", ".join(warnings)
            # There is a warning already -- just append and hope the admin will take action before the warning
            # gets too long to display in Alation
            if existing_warning_id:
                update_flag = alation_instance.generic_api_put(api=f"/integration/flag/{existing_warning_id}/",
                                                               body=dict(flag_reason=new_warning_text),
                                                               official=True)
            else: # create a new warning flag
                new_flag = alation_instance.generic_api_post(api=f"/integration/flag/",
                                                               body=dict(flag_type="WARNING",
                                                                         subject=dict(id=int(ds_id), otype="data"),
                                                                         flag_reason=new_warning_text),
                                                               official=True)
        else: # create the very first flag, namely the warning
            new_flag = alation_instance.generic_api_post(api=f"/integration/flag/",
                                                         body=dict(flag_type="WARNING",
                                                                   subject=dict(id=int(ds_id), otype="data"),
                                                                   flag_reason="Missing tables: " +", ".join(warnings)),
                                                         official=True)

    """
    cron_extraction	Yes	The extraction schedule in crontab format (minute, hour, day of month, month of year, day of week)
    disable_auto_extraction	No	True if the extraction schedule should not be executed, false to run extraction according to cron_extraction
    limit_schemas	Yes	Schemas to include.
    exclude_schemas	Yes	Schemas to exclude.
    remove_filtered_schemas	Yes	Whether to remove filtered schemas.
    """
    params=dict(force_refresh=True)
    log_me("Running MDE")
    mde = alation_instance.generic_api_post(api=f'/data/{ds_id}/list_schemas/')
    mde = alation_instance.generic_api_get(api=f'/integration/v1/datasource/{ds_id}/available_schemas/',
                                           params=params, official=True)
    body=dict(cron_extraction="{r} 0 * * *".format(r=random.randint(0, 59)),
              disable_auto_extraction=False,
              limit_schemas=[],
              exclude_schemas=['pg_temp_1', 'pg_toast', 'pg_toast_temp_1', 'public'],
              remove_filtered_schemas=True
              )
    sync = alation_instance.generic_api_put(api=f'/integration/v1/datasource/{ds_id}/sync_configuration/metadata_extraction/',
                                           body=body, official=True)
    mde = alation_instance.generic_api_post(api=f'/data/{ds_id}/extract_now/',
                                           params=params, official=False)
    return mde
예제 #7
0
 def modify_attribute(attr):
     for pattern, replacement in substitutions.items():
         match = re.search(pattern, attr, flags=re.IGNORECASE)
         if match:
             # Remember the substitution and print it for debugging
             if not match.group(0) in seen:
                 log_me(f'{match.group(0)} -> {replacement}')
                 seen[match.group(0)] = True
             attr = replacement
             break
     return attr
예제 #8
0
 def bulk_api_body(self):
     log_me("Creating Body for Bulk API")
     body = ""
     # Iterate through all the articles
     for id, article in self.article.iterrows():
         new_row = dict(description=article['body'], key=article['title'])
         # Iterate through the custom fields (caller could have sent fewer)
         for field in article['custom_fields']:
             if field['value_type'] in [
                     'picker', 'multi_picker', 'date', 'rich_text'
             ]:
                 new_row[field['field_name']] = field['value']
             else:
                 # In the case of Object Sets and People Sets, this may not be any good
                 log_me(
                     f"Warning: {field['field_name']}/{field['value_type']}/{field['value']}"
                 )
                 new_row[field['field_name']] = {
                     field['value_type']: field['value']
                 }
         body = body + json.dumps(new_row) + '\n'
     return body
예제 #9
0
    #dimension_articles_ = dimension_articles_.apply(target.postArticle, axis=1)

    # For one of the dimensions, "View", we will create an article as the parent for all the articles that use
    # that "View"

    c_fields = target.put_custom_fields(custom_fields_pd)
    print(c_fields)
    # returns a list of field IDs (existing or new)
    target.put_custom_template(file_key, c_fields)

    n = 100
    s = dtv.shape[0]
    j = math.floor(s / n) + 1  # how many blocks of 100?

    for b in range(j):
        log_me("Starting block {} of {} - total {}".format(b, n, s))
        body = ""
        for i in range(n):
            if i + b * n >= s:
                break
            art = dtv.iloc[i + b * n]
            art_not_na = art[art.notna()]
            art_as_dict = dict(art_not_na)
            new = {}
            table_in_body = []
            for k, v in art_as_dict.items():
                ## Let's create a row in a table for these values
                table_in_body.append(add_table_row(k, v))

                # If the field is a picker, let's populate the field value
                if k in pickers and pickers[k] > 1:
예제 #10
0
    dd = pickle_cont['dd']
    allArticles = pickle_cont['article']
    queries = pickle_cont['queries']
    allTemplates = pickle_cont['template']
    custom_fields = pickle_cont['custom_fields']

    # --- Log into the target instance
    url_2 = args['host']
    user_2 = args['username']
    passwd_2 = args['password']
    delete_flag = args['delete']
    target = AlationInstance(url_2, user_2, passwd_2)
    if delete_flag:
        a = target.get_articles(template=desired_template)
        log_me('Deleting existing articles: {}'.format(a.id))
        a.id.apply(target.del_article)

    Art = Article(allArticles)  # convert to Article class

    templates = target.get_templates()
    template_id = int(templates[templates.title == desired_template]['id'])

    target.put_queries(queries=queries)
    queries = target.get_queries()  # this is so we can figure out the number

    order = check_sequence(allArticles, first=1889)
    dummy = target.post_article(
        dict(title="dummy {}".format(
            time.strftime(u"%Y-%b-%d %H:%M:%S", time.localtime())),
             body='Delete this afterwards'))
예제 #11
0
    # Rename certain columns
    df.rename(columns=mapper, inplace=True)

    # Only keep the columns mentioned in the mapper
    cols = list(mapper.values())
    df = df.loc[:, cols]

    # convert dataframe into JSON rows format
    jsr = "\n".join(list(df.apply(json_row, axis=1)))

    desired_template = config.desired_template

    alation_1.put_articles_2(jsr, desired_template)

    log_me("Getting desired articles")
    articles = alation_1.get_articles(
        template=desired_template)  # download all articles
    Art = Article(articles)  # convert to Article class

    # First pass of fixing references
    # Art.convert_references()

    templates = alation_1.get_templates(
    )  # download all templates (with their custom fields)
    custom_fields = alation_1.get_custom_fields_from_template(
        desired_template)  # this way we also get the template info

    # Next, we put the objects we want. We need to start with the custom fields, then the template,
    # then the articles, and finally the glossaries.
예제 #12
0
random_users_2 = [dict(otype='user', oid=u) for u in list(users.id)]
# Create a BI Server, by passing a list of 1 URI

bi_server_details = [{
    "uri": "https://alation.looker.com/browse",
    "title": f"My BI Server {file_key}"
}]

bi_server_url = '/integration/v2/bi/server/'
# bi_server will be populated properly by this...
r = alation.generic_api_post(api=bi_server_url, body=bi_server_details)
# {'Status': 'Success: Created 1 servers.', 'Count': 1, 'Errors': [None], 'Server IDs': [48]}
if r['Count'] == 1:
    bi_server = r['Server IDs'][0]
    #alation.update_custom_field(o_type='bi_server', o_id=bi_server, field_id=3, update=file_key)
    log_me(f'Created server {file_key}: {base_url}/bi/v2/server/{bi_server}/')
else:
    log_me(f"Expected one BI Server to be created: {r}")

# bi_server = 2

# =========== Handling of the input file from Customer, containing reports in folders ===============
report_df = pd.read_csv('~/Downloads/mysql-analytics_run_1_stmt_1_0 (3).csv',
                        sep=',')
report_df.index = report_df.id


# using global variables now, not clean
def create_external_id(folder):
    if folder:
        return f'{file_key}+{bi_server}+{folder}'
예제 #13
0
 def create_pdf(self, first, additional_html=''):
     now = datetime.datetime.now()
     # Use pdfkit to create final ABOK pdf file
     # Options for PDFKit (wkhtmltopdf really) to generate the pdf - https://wkhtmltopdf.org/usage/wkhtmltopdf.txt
     bodyoptions = {
         'page-size':
         'Letter',
         'footer-line':
         '',
         'footer-center':
         'For use only by Alation customers.  No duplication or transmission without permission.',
         'footer-font-size':
         '9',
         #'disable-internal-links': True,
         #'disable-external-links': True,
         'dpi':
         '300',
         'minimum-font-size':
         '12',
         'disable-smart-shrinking':
         '',
         'header-left':
         'Alation Book of Knowledge' + now.strftime(u" %Y-%m-%d %H:%M "),
         'header-line':
         '',
         'header-font-size':
         '9',
         'header-spacing':
         '4',
         'margin-bottom':
         '15',
         'margin-top':
         '15',
         'footer-spacing':
         '4',
         'margin-left':
         '10',
         'margin-right':
         '10',
         'footer-right':
         '[page]/[toPage]',
         'enable-toc-back-links':
         '',
         'outline':
         '',
         'quiet':
         ''
     }
     # Define the location of the created ABOK pdf file
     ABOKpdffilename = 'ABOK' + now.strftime(u" %Y-%b-%d %H_%M ") + '.pdf'
     seq = self.check_sequence(first)
     html = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' +\
            '<link rel="stylesheet" href="https://use.typekit.net/pno7yrt.css">' +\
            '<link href="alation.css" rel="stylesheet" type="text/css">'
     for i in seq:
         html = html + '<h1>' + self.article.title[i] + '</h1></p>'
         html = html + self.article.body[i] + '</p>'
     html2 = abok.clean_up(html)
     html2 = html2 + additional_html
     pdfkit.from_string(html2,
                        ABOKpdffilename,
                        options=bodyoptions,
                        css="alation.css",
                        cover='cover.html',
                        cover_first=True)
     log_me('pdfkit finished processing')
예제 #14
0
        if part2:
            statement = f'COMMENT ON {otype} "{part1}"."{part2}"' + f" IS '{comment_text}'"
        else:
            statement = f'COMMENT ON {otype} "{part1}"' + f" IS '{comment_text}'"
        self.send_statement(statement)


    def get_existing_schemas(self):
        return pd.read_sql("SELECT catalog_name ,schema_name, schema_owner FROM information_schema.schemata",
                            con=self.connection)


# Main program
if __name__ == "__main__":
    desc = "Copies physical metadata from rosemeta to another pgSQL"
    log_me("Reading data from pickle file")
    data = pd.read_pickle("rosemeta.gzip")
    # Replace non-postgres types with equivalent postgres types
    substitutions={
        r'^(small)?datetime': 'timestamp',
        r'^timestamp_ltz': 'timestamp with time zone',
        r'^(timestamp_ntz|smalltimestamp)': 'timestamp',
        r'^string': 'text',
        r'^text\(max\)' : 'text',
        r'^(long|medium|short)?text(\(\d+\))?': 'text',
        r'^(number|double|float|numeric)(\(\d+\))?': 'numeric',
        r'^integer(\d)?' : 'integer',
        r'^(big|small)?integer(\d)?' : 'integer',
        r'^(big|small)?integer(\(\d+\))?' : 'integer',
        r'^(big|small)?_integer(\d)?' : 'integer[]',
        # int with (digit)
예제 #15
0
random_users_2 = [dict(otype='user', oid=u) for u in list(users.id)]
# Create a BI Server, by passing a list of 1 URI

bi_server_details = [{"uri": "https://alation.looker.com/browse"}]

bi_server_url = '/integration/v2/bi/server/'
# bi_server will be populated properly by this...
r = alation.generic_api_post(api=bi_server_url, body=bi_server_details)
# {'Status': 'Success: Created 1 servers.', 'Count': 1, 'Errors': [None], 'Server IDs': [48]}
if r['Count'] == 1:
    bi_server = r['Server IDs'][0]
    alation.update_custom_field(o_type='bi_server',
                                o_id=bi_server,
                                field_id=3,
                                update=file_key)
    log_me(f'Created server {file_key}: {base_url}/bi/v2/server/{bi_server}/')
else:
    log_me(f"Expected one BI Server to be created: {r}")

#bi_server = 123

# =========== Handling of the input file from Customer, containing reports in folders ===============
report_df = pd.read_csv('reports_full.csv', sep=';')
report_df.index = report_df.ID


# using global variables now, not clean
def create_external_id(folder):
    if folder:
        return f'{file_key}+{bi_server}+{folder}'
    # else it the root folder which does not need an external ID
예제 #16
0
            official=True)
        tables = tables.decode().split('\n')
        elems = []
        for t in tables:
            if len(t) > 1:
                elems.append(json.loads(t))
        tables_pd = pd.DataFrame(elems)
        tables_pd.index = tables_pd.key
        del tables_pd['key']
        return tables_pd

    df1 = get_values('steward')
    df2 = get_values('some multi')
    df3 = df1.merge(df2, left_index=True, right_index=True)

    log_me("Getting desired articles")
    articles = alation_1.get_articles(
        template=desired_template)  # download all articles
    Art = Article(articles)  # convert to Article class
    #queries = alation_1.get_queries()

    # First pass of fixing references
    #target.put_queries(queries=queries)
    # Art.convert_references()

    log_me("Getting media files via download")
    list_of_files = list(Art.get_files())
    alation_1.get_media_file(list_of_files, config.base_path)
    extract_files(config.base_path)

    log_me("Creating PDF")
예제 #17
0
        p = pickle.Unpickler(mypickle)
        pickle_cont = p.load()
    # extract data dictionary, articles, queries, templates, custom fields from the pickle
    dd = pickle_cont['dd']
    articles = pickle_cont['article']
    queries = pickle_cont['queries']

    # --- Log into the target instance
    target = AlationInstance(config.args['host'], config.args['username'],
                             config.args['password'])
    # -- Make sure ABOK Article template is created
    template_id = target.put_custom_template('ABOK Article')
    # If desired, delete all pre-existing ABOK articles.
    if config.args['delete']:
        a = target.get_articles(template=config.desired_template)
        log_me('Deleting existing articles: {}'.format(a.id))
        a.id.apply(target.del_article)

    # Upload all queries to the instance. Note we implicitly assume here that the only
    # references are to existing objects, e.g. AA tables
    target.put_queries(queries=queries)
    queries = target.get_queries()  # this is so we can figure out the ID

    # to-do: check sequence before pickling! Then we can simplify this code even more
    order = check_sequence(
        articles, first=config.first_abok_article)  # order is list of IDs
    n = len(order)

    offset = use_dummy_to_get_highest_id() + 1
    # if the order is set beforehand, the mapping dict is no longer required, we would just add the offset
    mapping_dict = {}