예제 #1
0
    def newTags(self):
        """
    Identify if some tags are completely new.
    """
        if len(self.new_minus_old) == 0:
            return

        #msg = []
        #msg.append("The following tags are completely new:")
        #msg = msg + sorted(list(self.new_minus_old))
        #msg = "\n".join(msg)
        #logger.info(msg)
        #logger.info("")

        logger.info("Found %i new tag(s)" % len(self.new_minus_old))
        logger.info("")

        new_processed = set()
        for ni in self.new_minus_old:
            confirm_msg = colored('Did you add the tag "%s"? yes/[no] ' % ni,
                                  'cyan')
            confirm_new = input(confirm_msg)
            if confirm_new.lower() in ['y', 'yes']:
                mig_i = ('touch', None, ni)
                self.migrations.append(mig_i)
                new_processed.add(ni)

        # remove all processed
        self.new_minus_old -= new_processed
예제 #2
0
    def connect(self):
        logger.info("Connecting to redis cache")
        logger.debug(self.redis_args)
        import pyarrow as pa

        self.redis_client = redis.Redis(**self.redis_args)
        self.pyarrow_context = pa.default_serialization_context()
예제 #3
0
    def droppedTags(self):
        """
    Identify if some tags are completely dropped.
    Ask the user if indeed dropped, or accident.
    Follows the idea of django/db/migrations/questioner.py
    where django asks if fields are renamed or dropped
    https://github.com/django/django/blob/e90af8bad44341cf8ebd469dac57b61a95667c1d/django/db/migrations/questioner.py
    """
        if len(self.old_minus_new) == 0:
            return

        #msg = []
        #msg.append("The following tags are no longer present:")
        #msg = msg + sorted(list(self.old_minus_new))
        #msg = "\n".join(msg)
        #logger.info(msg)
        #logger.info("")

        logger.info("There are %i deleted tag(s)" % len(self.old_minus_new))
        logger.info("")

        old_processed = set()
        for ni in self.old_minus_new:
            confirm_msg = colored(
                'Did you completely delete the tag "%s"? yes/[no] ' % ni,
                'cyan')
            confirm_del = input(confirm_msg)
            if confirm_del.lower() in ['y', 'yes']:
                mig_i = ('rm', ni, None)
                self.migrations.append(mig_i)
                old_processed.add(ni)

        # remove all processed
        self.old_minus_new -= old_processed
예제 #4
0
    def is_configured(self):
        from isitfit.utils import ping_matomo

        # check not None and not empty string
        if os.getenv('DATADOG_API_KEY', None):
            if os.getenv('DATADOG_APP_KEY', None):
                if self.print_configured:
                    logger.info("Datadog env vars available")
                    ping_matomo("/cost/setting?datadog.is_configured=True")
                    self.print_configured = False
                return True

        if self.print_configured:
            logger.info(
                "Datadog env vars missing. Set DATADOG_API_KEY and DATADOG_APP_KEY to get memory data from Datadog."
            )
            ping_matomo("/cost/setting?datadog.is_configured=False")

            import click
            display_msg = lambda x: click.secho(x, fg='yellow')
            display_msg(
                "Note: without the datadog integration, memory metrics are missing, thus only CPU is used, which is not representative for memory-bound applications."
            )
            display_msg(
                "If you gather memory metrics using another provider than datadog, please get in touch at https://www.autofitcloud.com/contact"
            )
            self.print_configured = False

        return False
예제 #5
0
    def tag_set(self):
        logger.info("Step 3: Generate a set of tags")

        # merge with original names
        df_ori = pd.DataFrame({
            'original': self.names_original,
            'lower': self.names_lower,
            'instance_id': self.names_df.instance_id
        })
        df_ori['tag_set'] = None

        import re

        def myfind(t, n):
            # https://stackoverflow.com/a/48205793/4126114
            f = re.findall(r"\b%s\b" % t, n, re.IGNORECASE)
            return len(f) > 0

        self.len_ori = df_ori.shape[0]
        for i in range(self.len_ori):
            name_value = df_ori.iloc[i].lower
            for j in range(self.df_freq_all.shape[0]):
                i_sub = [
                    myfind(tag_value, name_value)
                    for tag_value in self.df_freq_all.word_combined.values
                ]
                df_sub = self.df_freq_all[i_sub]
                df_sub = df_sub.sort_values('n', ascending=False)
                df_sub = df_sub.iloc[:3]
                df_ori.at[i, 'tag_set'] = set(df_sub.word_combined.values)

        self.df_ori = df_ori
예제 #6
0
    def count(self):
        # method 1
        # ec2_it = self.ec2_resource.instances.all()
        # return len(list(ec2_it))

        if self.n_entry is not None:
            return self.n_entry

        self.n_entry = len(list(self.iterate_core(True)))

        # interim result for timer data to calculate performance (seconds per ec2 or seconds per rds)
        from isitfit.utils import ping_matomo
        ping_matomo(
            "/cost/base_iterator/BaseIterator/count?service=%s&n_entry=%s&n_region=%s"
            % (self.service_name, self.n_entry, len(self.region_include)))

        # send message to logs for info
        if self.n_entry == 0 and len(self.region_include) == 0:
            msg_count = "Found no %s"
            logger.info(msg_count % (self.service_description))
        else:
            msg_count = "Found a total of %i %s in %i region(s) (other regions do not hold any %s)"
            logger.info(msg_count %
                        (self.n_entry, self.service_description,
                         len(self.region_include), self.service_name))

        return self.n_entry
예제 #7
0
    def fetch(self):
        logger.debug("TagsSuggestBasic::fetch")
        logger.info("Counting EC2 instances")
        n_ec2_total = len(list(self.ec2_resource.instances.all()))
        msg_total = "Found a total of %i EC2 instances" % n_ec2_total
        if n_ec2_total == 0:
            from isitfit.cli.click_descendents import IsitfitCliError
            raise IsitfitCliError(msg_total, self.ctx)

        logger.warning(msg_total)

        self.tags_list = []
        from tqdm import tqdm
        desc = "Scanning EC2 instances"
        ec2_all = self.ec2_resource.instances.all()
        for ec2_obj in tqdm(ec2_all, total=n_ec2_total, desc=desc, initial=1):
            if ec2_obj.tags is None:
                tags_dict = {}
            else:
                tags_dict = self.tags_to_dict(ec2_obj)

            tags_dict['instance_id'] = ec2_obj.instance_id
            self.tags_list.append(tags_dict)

        # convert to pandas dataframe when done
        self.tags_df = self._list_to_df()
예제 #8
0
def analyze(ctx, filter_tags, save_details):
    # gather anonymous usage statistics
    ping_matomo("/cost/analyze?filter_tags=%s&save_details=%s" %
                (filter_tags, b2l(save_details)))

    # save to click context
    share_email = ctx.obj.get('share_email', [])

    #logger.info("Is it fit?")
    logger.info("Initializing...")

    # set up pipelines for ec2, redshift, and aggregator
    from isitfit.cost import ec2_cost_analyze, redshift_cost_analyze, account_cost_analyze
    mm_eca = ec2_cost_analyze(ctx, filter_tags, save_details)
    mm_rca = redshift_cost_analyze(share_email,
                                   filter_region=ctx.obj['filter_region'],
                                   ctx=ctx,
                                   filter_tags=filter_tags)

    # combine the 2 pipelines into a new pipeline
    mm_all = account_cost_analyze(mm_eca, mm_rca, ctx, share_email)

    # configure tqdm
    from isitfit.tqdmman import TqdmL2Quiet
    tqdml2 = TqdmL2Quiet(ctx)

    # Run pipeline
    mm_all.get_ifi(tqdml2)
예제 #9
0
def optimize(ctx, n, filter_tags, allow_ec2_different_family):
    # gather anonymous usage statistics
    ping_matomo(
        "/cost/optimize?n=%i&filter_tags=%s&allow_ec2_different_family=%s" %
        (n, filter_tags, b2l(allow_ec2_different_family)))

    # save to context
    share_email = ctx.obj.get('share_email', [])
    ctx.obj['allow_ec2_different_family'] = allow_ec2_different_family

    #logger.info("Is it fit?")
    logger.info("Initializing...")

    from isitfit.cost import ec2_cost_optimize, redshift_cost_optimize, account_cost_optimize
    mm_eco = ec2_cost_optimize(ctx, n, filter_tags)
    mm_rco = redshift_cost_optimize(filter_region=ctx.obj['filter_region'],
                                    ctx=ctx,
                                    filter_tags=filter_tags)

    # merge and run pipelines
    mm_all = account_cost_optimize(mm_eco, mm_rco, ctx)

    # configure tqdm
    from isitfit.tqdmman import TqdmL2Quiet
    tqdml2 = TqdmL2Quiet(ctx)

    # Run pipeline
    mm_all.get_ifi(tqdml2)
예제 #10
0
 def pullLatest(self):
     logger.info("Pulling latest tags for comparison")
     from .tagsDump import TagsDump
     td = TagsDump(self.ctx)
     td.fetch()
     td.suggest()  # not really suggesting. Just dumping to csv
     self.latest_df = td.tags_df
     self.latest_df = self.latest_df.fillna(value='')
예제 #11
0
 def suggest(self):
     logger.debug("TagsSuggestBasic::suggest")
     logger.info("Generating suggested tags")
     from .tagsImplier import TagsImplierMain
     tags_implier = TagsImplierMain(self.tags_df)
     self.suggested_df = tags_implier.imply()
     self.csv_fn = dump_df_to_csv(self.suggested_df,
                                  'isitfit-tags-suggestBasic-')
     self.suggested_shape = self.suggested_df.shape
예제 #12
0
 def _list_to_df(self):
     logger.info("Converting tags list into dataframe")
     import pandas as pd
     df = pd.DataFrame(self.tags_list)
     df = df.rename(columns={
         'instance_id': '_0_instance_id',
         'Name': '_1_Name'
     })  # trick to keep instance ID and name as the first columns
     df = df.sort_index(axis=1)  # sort columns
     df = df.rename(columns={
         '_0_instance_id': 'instance_id',
         '_1_Name': 'Name'
     })  # undo trick
     return df
예제 #13
0
def dump_df_to_csv(df_dump, csv_prefix):
    import tempfile
    import pandas as pd

    # https://pypi.org/project/termcolor/
    from termcolor import colored
    from isitfit.dotMan import DotMan
    with tempfile.NamedTemporaryFile(prefix=csv_prefix,
                                     suffix='.csv',
                                     delete=False,
                                     dir=DotMan().tempdir()) as fh:
        logger.info(colored("Dumping data into %s" % fh.name, "cyan"))
        df_dump.to_csv(fh.name, index=False)
        return fh.name
예제 #14
0
    def _tags_suggest(self):
        logger.info("Requesting tag suggestions from isitfit server")

        load_send = {}
        #load_send.update(self.api_man.r_sts)
        load_send['s3_key_suffix'] = self.s3_key_suffix
        load_send['sqs_url'] = self.api_man.r_body['sqs_url']

        # curl equivalent
        # curl -X POST --data "foo=bar" https://api.isitfit.io/v0/974668457921/AIDA6F3WEM7AXY6Y4VWDC/tags/suggest
        r2, dt_now = self.api_man.request(method='post',
                                          relative_url='./tags/suggest',
                                          payload_json=load_send,
                                          authenticated_user_path=True)

        return r2, dt_now
예제 #15
0
    def after_all(self, context_all):
        # unpack
        # Update 2019-12-17 ec2_noCloudwatch is deprecated
        # ec2_noCloudwatch, ec2_noCloudtrail = context_all['ec2_noCloudwatch'], context_all['ec2_noCloudtrail']
        ec2_noCloudtrail = context_all['ec2_noCloudtrail']


        # get now + 10 minutes
        # http://stackoverflow.com/questions/6205442/ddg#6205529
        import datetime as dt
        dt_now = dt.datetime.now()
        TRY_IN = 10
        now_plus_10 = dt_now + dt.timedelta(minutes = TRY_IN)
        now_plus_10 = now_plus_10.strftime("%H:%M")

#        if len(ec2_noCloudwatch)>0:
#          n_no_cw = len(ec2_noCloudwatch)
#          has_more_cw = "..." if n_no_cw>5 else ""
#          l_no_cw = ", ".join(ec2_noCloudwatch[:5])
#          logger.info("No cloudwatch data for %i resources: %s%s"%(n_no_cw, l_no_cw, has_more_cw))
#          logger.info("Try again in %i minutes (at %s) to check for new data"%(TRY_IN, now_plus_10))
#          logger.info("")

        if len(ec2_noCloudtrail)>0:
          n_no_ct = len(ec2_noCloudtrail)
          has_more_ct = "..." if n_no_ct>5 else ""
          l_no_ct = ", ".join(ec2_noCloudtrail[:5])
          logger.info("No cloudtrail data for %i resources: %s%s"%(n_no_ct, l_no_ct, has_more_ct))
          logger.info("Try again in %i minutes (at %s) to check for new data"%(TRY_IN, now_plus_10))
          logger.info("")

        return context_all
예제 #16
0
    def freq_list(self):
        logger.info("Step 1: calculate word frequencies")
        # lower-case
        self.names_lower = [x.lower() for x in self.names_original]

        # count single word frequencies
        # https://programminghistorian.org/en/lessons/counting-frequencies
        import re
        #names_split = (' '.join(names_original)).split(' ')
        #words = re.findall("\w+", "the quick person did not realize his speed and the quick person bumped")
        names_split = re.findall("\w+", ' '.join(self.names_lower))

        # Counting bigrams
        # https://stackoverflow.com/a/12488794/4126114
        from itertools import tee, islice
        from collections import Counter

        def ngrams(lst, n):
            tlst = lst
            while True:
                a, b = tee(tlst)
                l = tuple(islice(a, n))
                if len(l) == n:
                    yield l
                    next(b)
                    tlst = b
                else:
                    break

        def get_freq(n):
            # names_freq = dict(Counter(zip(names_split, islice(names_split, n-1, None))))
            names_freq = dict(Counter(ngrams(names_split, n)))
            names_freq = [(k, v) for k, v in names_freq.items()]
            return names_freq

        self.freq_1w = get_freq(1)
        self.freq_2w = get_freq(2)
예제 #17
0
    def freq_df(self):
        logger.info("Step 2: convert word frequencies to pandas dataframe")

        # convert to pandas dataframe

        def min_is_2(df_in):
            return df_in[df_in.n >= 2]  # minimum occurence is 2

        def freq2df(freq_in, l):
            df_freq_in = pd.DataFrame(freq_in, columns=['word_tuple', 'n'])
            df_freq_in = min_is_2(df_freq_in)
            df_freq_in['l'] = l
            return df_freq_in

        df_freq_1w = freq2df(self.freq_1w, 1)
        df_freq_1w['word_1'] = df_freq_1w.word_tuple.apply(lambda x: x[0])
        df_freq_1w['word_2'] = None
        df_freq_2w = freq2df(self.freq_2w, 2)
        df_freq_2w['word_1'] = df_freq_2w.word_tuple.apply(lambda x: x[0])
        df_freq_2w['word_2'] = df_freq_2w.word_tuple.apply(lambda x: x[1])

        # print("##########")
        # print("before filter")
        # print("1w")
        # print(df_freq_1w)
        # print("")
        # print("2w")
        # print(df_freq_2w)
        # print("##########")

        # filter out 1-grams if their 2-gram counterpart is superior
        df_freq_2w = df_freq_2w.merge(df_freq_1w[['word_1', 'n']],
                                      how='left',
                                      left_on='word_1',
                                      right_on='word_1',
                                      suffixes=['', '.1w=2w.word1'])
        df_freq_2w = df_freq_2w.merge(df_freq_1w[['word_1', 'n']],
                                      how='left',
                                      left_on='word_2',
                                      right_on='word_1',
                                      suffixes=['', '.1w=2w.word2'])
        df_freq_2w = df_freq_2w.drop(columns=['word_1.1w=2w.word2'])
        df_freq_2w = df_freq_2w[(df_freq_2w.n >= df_freq_2w['n.1w=2w.word1']) &
                                (df_freq_2w.n >= df_freq_2w['n.1w=2w.word2'])]

        # print("")
        # print("after filtering 2w")
        # print(df_freq_2w)

        # drop from 1w the components that were used in the 2w
        df_freq_1w = df_freq_1w[~(df_freq_1w.word_1.isin(df_freq_2w.word_1)
                                  | df_freq_1w.word_1.isin(df_freq_2w.word_2))]

        # drop columns
        df_freq_2w = df_freq_2w.drop(
            columns=['n.1w=2w.word1', 'n.1w=2w.word2'])

        # concatenate into 1 df
        df_freq_all = pd.concat([df_freq_1w, df_freq_2w], axis=0)
        df_freq_all['word_combined'] = df_freq_all.apply(
            lambda r: r.word_1
            if r.word_2 is None else r.word_1 + ' ' + r.word_2,
            axis=1)

        #print("")
        #print("final df")
        #print(df_freq_all)
        self.df_freq_all = df_freq_all
예제 #18
0
    def tag_list(self):
        logger.info("Step 4: convert the set of tags to a list of tags")

        df_ori = self.df_ori

        # initialize
        # just doing [[None]*3]*len_ori doesn't work
        df_ori['tag_list'] = None
        for i1 in range(self.len_ori):
            df_ori.at[i1, 'tag_list'] = [None] * 3

        # distributing the tag_set to tag_1, tag_2, tag_3 in such a way that for example "app" is at tag_1 for all the instances
        tag_processed = set()
        for i1 in range(self.len_ori):
            for tag_value in df_ori.iloc[i1].tag_set:
                if tag_value in tag_processed:
                    continue

                tag_processed.add(tag_value)
                logger.debug("<<<<<<<<>>>>>>>>>>>>")
                logger.debug("%i: %s" % (i1, tag_value))
                logger.debug(df_ori)

                if tag_value in df_ori.at[i1, 'tag_list']:
                    continue  # already inserted this tag

                # find free indeces in current list
                if None not in df_ori.at[i1, 'tag_list']:
                    raise Exception("No more space in list for %s" % tag_value)

                # https://stackoverflow.com/a/6294205/4126114
                free_indices = [
                    i for i, x in enumerate(df_ori.at[i1, 'tag_list'])
                    if x is None
                ]

                # find the first free index which is ok for all entries having this tag
                free_chosen = None
                logger.debug("Searching for free index for %s" % tag_value)
                for free_i1 in free_indices:
                    found_conflict = False
                    for i2 in range(self.len_ori):
                        if found_conflict: break
                        if i2 <= i1: continue
                        logger.debug("Checking row %i" % i2)
                        # if tag in set of tags for this 2nd row
                        if tag_value in df_ori.loc[i2].tag_set:
                            # and if the value for this tag is not *already* set
                            if tag_value not in df_ori.loc[i2].tag_list:
                                if df_ori.loc[i2,
                                              'tag_list'][free_i1] is not None:
                                    logger.debug("Found conflict")
                                    found_conflict = True

                    if not found_conflict:
                        logger.debug("Found chosen free index at %i" % free_i1)
                        free_chosen = free_i1
                        break

                # if no free index chosen, raise Exception
                if free_chosen is None:
                    raise Exception(
                        "Conflict found: %s didn't find a free index to use" %
                        (tag_value))

                # otherwise use the chosen index
                # Old way of getting first None only # free_chosen = df_ori.at[i1, 'tag_list'].index(None)
                free_chosen = free_i1
                df_ori.at[i1, 'tag_list'][free_chosen] = tag_value

                # set this tag for all other rows at "free_chosen"
                for i2 in range(self.len_ori):
                    if i2 <= i1: continue
                    if tag_value in df_ori.loc[i2].tag_set:
                        if tag_value not in df_ori.loc[i2].tag_list:
                            if df_ori.loc[i2,
                                          'tag_list'][free_chosen] is not None:
                                raise Exception(
                                    "Conflict found despite pre-check? %s wants to be at %i but found %s already"
                                    %
                                    (tag_value, free_chosen,
                                     df_ori.loc[i2, 'tag_list'][free_chosen]))

                        df_ori.at[i2, 'tag_list'][free_chosen] = tag_value

        # mesh out the tag_list to tag_1 tag_2 tag_3
        df_ori['tag_1'] = df_ori.tag_list.apply(lambda x: x[1 - 1])
        df_ori['tag_2'] = df_ori.tag_list.apply(lambda x: x[2 - 1])
        df_ori['tag_3'] = df_ori.tag_list.apply(lambda x: x[3 - 1])

        # re-order columns
        df_ori = df_ori.rename(columns={'original': 'instance_name'})
        df_ori = df_ori[[
            'instance_id', 'instance_name', 'tag_1', 'tag_2', 'tag_3'
        ]]

        # done
        #print("")
        #print("tagged")
        #print(df_ori)

        self.df_ori = df_ori
예제 #19
0
    def get_ifi(self, tqdml2_obj):
        # display name of runner
        logger.info(self.description)

        # 0th pass to count
        n_ec2_total = self.ec2_it.count()

        if n_ec2_total == 0:
            import click
            click.secho("No resources found in %s" %
                        self.ec2_it.service_description,
                        fg="red")
            return

        # context for pre listeners
        context_pre = {}
        context_pre['ec2_instances'] = self.ec2_it
        context_pre['region_include'] = self.ec2_it.get_regionInclude()
        context_pre['n_ec2_total'] = n_ec2_total
        context_pre['click_ctx'] = self.ctx
        context_pre['mainManager'] = self

        # call listeners
        for l in self.listeners['pre']:
            context_pre = l(context_pre)
            if context_pre is None:
                raise Exception(
                    "Breaking the chain is not allowed in listener/pre")

        # iterate over all ec2 instances
        sum_capacity = 0
        sum_used = 0
        df_all = []
        ec2_noCloudwatch = []  # FIXME DEPRECATED
        ec2_noCloudtrail = []

        # add some spaces for aligning the progress bars
        desc = "Pass 2/2 through %s" % self.ec2_it.service_description
        desc = "%-50s" % desc

        # Edit 2019-11-12 use "initial=0" instead of "=1". Check more details in a similar note in "cloudtrail_ec2type.py"
        iter_wrap = tqdml2_obj(self.ec2_it,
                               total=n_ec2_total,
                               desc=desc,
                               initial=0)
        for ec2_dict, ec2_id, ec2_launchtime, ec2_obj in iter_wrap:

            # context dict to be passed between listeners
            context_ec2 = {}
            context_ec2['mainManager'] = self
            if 'df_cat' in context_pre:
                context_ec2['df_cat'] = context_pre[
                    'df_cat']  # copy object between contexts
            context_ec2['ec2_dict'] = ec2_dict
            context_ec2['ec2_id'] = ec2_id
            context_ec2['ec2_launchtime'] = ec2_launchtime
            context_ec2['ec2_obj'] = ec2_obj

            try:
                # call listeners
                # Listener can return None to break out of loop,
                # i.e. to stop processing with other listeners
                for l in self.listeners['ec2']:
                    context_ec2 = l(context_ec2)

                    # skip rest of listeners if one of them returned None
                    if context_ec2 is None:
                        logger.debug(
                            "Listener %s is breaking per_resource for resource %s"
                            % (l, ec2_id))
                        break

            except NoCloudtrailException:
                ec2_noCloudtrail.append(ec2_id)

            except IsitfitCliRunnerBreakIterator as e:
                # check request for breaking from the iterator loop
                # eg for isitfit cost optimize --n=1
                logger.debug("Breaking from the per-resource iterator")
                break

        # call listeners
        #logger.info("... done")
        #logger.info("")
        #logger.info("")

        # set up context
        context_all = {}
        context_all['n_ec2_total'] = n_ec2_total
        context_all['mainManager'] = self
        context_all['region_include'] = self.ec2_it.region_include
        if 'df_cat' in context_pre:
            context_all['df_cat'] = context_pre[
                'df_cat']  # copy object between contexts

        # more
        context_all['ec2_noCloudwatch'] = ec2_noCloudwatch  # FIXME DEPRECATED
        context_all['ec2_noCloudtrail'] = ec2_noCloudtrail
        context_all['click_ctx'] = self.ctx

        # call listeners
        for l in self.listeners['all']:
            context_all = l(context_all)
            if context_all is None:
                raise Exception(
                    "Breaking the chain is not allowed in listener/all: %s" %
                    str(l))

        # done
        #logger.info("")
        return context_all
예제 #20
0
    def send(self, share_email):
        if share_email is None:
            return

        if len(share_email) == 0:
            return

        logger.info("Sending email")

        # pre-process spaces if any
        share_email = [x.strip() for x in share_email]

        # get resources available
        # Update 2019-12-13 no need to register with API after istifit-api /share/email became anonymous
        # self.api_man.register()

        # submit POST http request
        response_json, dt_now = self._send_core(share_email)

        # check response status

        # Update 2019-12-12 Instead of raising an exception and aborting,
        # show the user a prompt to check his/her email
        # and give the program a chance to re-send the email
        import click
        while ((response_json['isitfitapi_status']['code']
                == 'Email verification in progress') and (self.try_again > 0)):
            # https://click.palletsprojects.com/en/7.x/api/?highlight=click%20confirm#click.pause
            click.pause(
                info=
                'A verification link was emailed to you now. Please click the link, then press any key here to continue...',
                err=False)
            self.try_again -= 1
            response_json, dt_now = self._send_core(share_email)

        if self.try_again == 0:
            raise IsitfitCliError(
                response_json['isitfitapi_status']['description'], self.ctx)

        # Update 2019-12-12 This code will get handled by apiMan and will never arrive here, so commenting it out
        #if response_json['isitfitapi_status']['code']=='error':
        #    raise IsitfitCliError(response_json['isitfitapi_status']['description'], self.ctx)

        if response_json['isitfitapi_status']['code'] != 'ok':
            response_str = json.dumps(response_json)
            raise IsitfitCliError(
                "Unsupported response from server: %s" % response_str,
                self.ctx)

        # Save the 1st entry as "last-used email"
        # Make sure to save this *after* the verification steps above are done so as to maintain the last *working* email
        self.last_email.set(share_email[0])

        # validate schema
        from schema import SchemaError, Schema, Optional
        register_schema_2 = Schema({'from': str, Optional(str): object})
        try:
            register_schema_2.validate(response_json['isitfitapi_body'])
        except SchemaError as e:
            responseBody_str = json.dumps(response_json['isitfitapi_body'])
            err_msg = "Received response body: %s. Schema error: %s" % (
                responseBody_str, str(e))
            raise IsitfitCliError(err_msg, self.ctx)

        # otherwise proceed
        emailFrom = response_json['isitfitapi_body']['from']
        import click
        click.echo("Email sent from %s to: %s" %
                   (emailFrom, ", ".join(share_email)))
        return
예제 #21
0
    def iterate_core(self, display_tqdm=False):
        fx_l = [
            'service_name', 'service_description', 'paginator_name',
            'paginator_entryJmespath', 'paginator_exception', 'entry_keyId',
            'entry_keyCreated'
        ]
        for fx_i in fx_l:
            # https://stackoverflow.com/a/9058315/4126114
            if fx_i not in self.__class__.__dict__.keys():
                raise Exception("Derived class should set %s" % fx_i)

        # iterate on regions
        import botocore
        import boto3
        import jmespath
        redshift_regions_full = boto3.Session().get_available_regions(
            self.service_name)
        import copy
        redshift_regions_sub = copy.deepcopy(redshift_regions_full)
        # redshift_regions_sub = ['us-west-2'] # FIXME

        if self.filter_region is not None:
            if self.filter_region not in redshift_regions_sub:
                msg_err = "Invalid region specified: %s. Supported values: %s"
                msg_err = msg_err % (self.filter_region,
                                     ", ".join(redshift_regions_sub))
                raise IsitfitCliError(msg_err,
                                      None)  # passing None for click context

            # over-ride
            redshift_regions_sub = [self.filter_region]

        # Before iterating, display a message that skipping some regions due to load from cache
        # The following conditions = region_include was loaded from cache
        if self.regionInclude_ready and len(redshift_regions_sub) != len(
                self.region_include) and not self.displayed_willskip:
            msg1 = "%s: Will skip %i out of %i regions which were either empty or inaccessible. To re-check, delete the local cache file %s"
            msg1 = msg1 % (self.service_description,
                           len(redshift_regions_sub) -
                           len(self.region_include), len(redshift_regions_sub),
                           self.simpleCacheMan.filename)
            import click
            click.echo(colored(msg1, "yellow"))
            self.displayed_willskip = True

        # iterate
        region_iterator = redshift_regions_sub
        if display_tqdm:
            # add some spaces for aligning the progress bars
            desc = "%s, counting in all regions     " % self.service_description
            desc = "%-50s" % desc
            region_iterator = self.tqdmman(region_iterator,
                                           total=len(redshift_regions_sub),
                                           desc=desc)

        for region_name in region_iterator:
            if self.regionInclude_ready and self.filter_region is None:
                if region_name not in self.region_include:
                    # skip since already failed to use it
                    continue

            logger.debug("Region %s" % region_name)
            boto3.setup_default_session(region_name=region_name)

            # boto3 clients
            # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/redshift.html#Redshift.Client.describe_logging_status
            # Update 2019-12-09
            #   Unfolding the iterator can cause a rate limiting error for accounts with more than 200 EC2
            #   as reported by u/moofishies on 2019-11-12
            #   Similar to: https://github.com/boto/botocore/pull/891#issuecomment-303526763
            #   The max_attempts config here is increased from the default 4 to decrease the rate limiting chances
            #   https://github.com/boto/botocore/pull/1260
            #   Note that with each extra retry, an exponential backoff is already implemented inside botocore
            #   More: https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html
            from botocore.config import Config
            service_client = boto3.client(
                self.service_name, config=Config(retries={'max_attempts': 10}))

            # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/cloudwatch.html#metric
            self.cloudwatch_resource = boto3.resource('cloudwatch')

            # iterate on service resources, eg ec2 instances, redshift clusters
            paginator = service_client.get_paginator(self.paginator_name)
            rc_iterator = paginator.paginate()
            try:
                region_anyClusterFound = False
                for rc_describe_page in rc_iterator:
                    rc_describe_entries = jmespath.search(
                        self.paginator_entryJmespath, rc_describe_page)
                    for rc_describe_entry in rc_describe_entries:
                        region_anyClusterFound = True
                        # add field for region
                        rc_describe_entry['Region'] = region_name
                        # yield
                        yield rc_describe_entry

                if not self.regionInclude_ready and self.filter_region is None:
                    if region_anyClusterFound:
                        # only include if found clusters in this region
                        self.region_include.append(region_name)

            except botocore.exceptions.ClientError as e:
                # Exception that means "no access to region"
                if e.response['Error']['Code'] == self.paginator_exception:
                    continue

                # eg if user doesnt have access arn:aws:redshift:ap-northeast-1:974668457921:cluster:*
                # it could be because of specific access to region, or general access to the full redshift service
                # Note: capturing this exception means that the region is no longer included in the iterator, but it will still iterate over other regions
                if e.response['Error']['Code'] == 'AccessDenied':
                    self.region_accessdenied.append(e)
                    continue

                # Handle error:
                # botocore.exceptions.ClientError: An error occurred (InvalidClientTokenId) when calling the AssumeRole operation: The security token included in the request is invalid.
                # Not sure what this means, but maybe that a role is not allowed to assume into a region?
                # This error can be raised for example with using my local AWS profile "afc_external_readCur".
                # Here is an excerpt from my ~/.aws/credentials file
                # # Role created in Autofitcloud giving access to shadiakiki1986 to read CUR S3
                # [afc_external_readCur]
                # role_arn = arn:aws:iam::123456789:role/external-read-athena-role-ExternalReadCURRole-abcdef
                # source_profile = a_user_profile_not_a_role
                # region = us-east-1
                if e.response['Error']['Code'] == 'InvalidClientTokenId':
                    continue

                # after setting up the InvalidClientTokenId filter above on the profile afc_external_readCur,
                # faced error: botocore.exceptions.ClientError: An error occurred (UnauthorizedOperation) when calling the DescribeInstances operation: You are not authorized to perform this operation.
                if e.response['Error']['Code'] == 'UnauthorizedOperation':
                    continue

                # all other exceptions raised
                raise e

        # before exiting, check if a count just completed, and mark region_include as usable
        if not self.regionInclude_ready and self.filter_region is None:
            self.regionInclude_ready = True

            # save to cache
            self.simpleCacheMan.save_key(key='region_include',
                                         value=self.region_include)
            self.simpleCacheMan.save_key(key='region_accessdenied',
                                         value=self.region_accessdenied)

        # before exiting, if got some AccessDenied errors, display to user
        # Note 1: originally, I wanted to break the iterator on the 1st AccessDenied error,
        # thinking that it's because the user doesn't have permission to the service as a whole.
        # Later, I figured out that maybe the user has permission to a subset of regions,
        # in which case getting an error on region R1 is normal,
        # and the iterator should still proceed to the next region R2.
        if not self.displayed_accessdenied and len(
                self.region_accessdenied) > 0:
            # 1st part goes to stdout
            msgx = "AWS returned AccessDenied errors on %i out of %i regions. Use `isitfit --verbose ...` and re-run the command for more details"
            msgx = msgx % (len(
                self.region_accessdenied), len(redshift_regions_sub))
            import click
            click.echo(colored(msgx, "yellow"))

            # 2nd part is too long, send it to --verbose
            msg2 = "\n".join(
                ["- %s" % str(e) for e in self.region_accessdenied])
            msgx = "Here are the full error messages:\n%s"
            msgx = msgx % (msg2)
            logger.info(colored(msgx, "yellow"))

            self.displayed_accessdenied = True
예제 #22
0
    def processPush(self, dryRun: bool):
        # max ec2 per call is 20
        # but just doing 1 at a time for now
        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/resourcegroupstaggingapi.html#ResourceGroupsTaggingAPI.Client.tag_resources
        import boto3
        tagging_client = boto3.client('resourcegroupstaggingapi')
        ec2_resource = boto3.resource('ec2')
        account_id = boto3.client('sts').get_caller_identity()['Account']

        import json
        preproc = lambda x: x[sorted(list(x.columns))].set_index('instance_id')
        self.latest_df = preproc(self.latest_df)
        self.csv_df = preproc(self.csv_df)
        from tqdm import tqdm
        runType_prefix = "Dry run" if dryRun else "Live"
        for instance_id, row_new in tqdm(self.csv_df.iterrows(),
                                         total=self.csv_df.shape[0],
                                         desc="Tag CSV row (%s)" %
                                         runType_prefix,
                                         initial=1):
            row_old = self.latest_df.loc[instance_id]
            tags_new = row_new.to_dict()
            tags_old = row_old.to_dict()
            if tags_new == tags_old:
                logger.debug("Skipping %s since no changes" % instance_id)
                continue

            # keeping only changed keys
            keys_dotag = {}
            for k in tags_new:
                if not tags_new[k]:
                    continue  # empty tags are skipped

                if k not in tags_old:
                    keys_dotag[k] = tags_new[k]
                    continue

                if tags_new[k] != tags_old[k]:
                    keys_dotag[k] = tags_new[k]
                    continue

            # proceed with untagging
            keys_untag = []
            for k in tags_old:
                if not tags_old[k]:
                    continue  # empty tags are skipped

                if k not in tags_new:
                    keys_untag.append(k)

            if not keys_dotag and not keys_untag:
                continue

            # if any of them set:
            instance_obj = ec2_resource.Instance(instance_id)
            instance_arn = 'arn:aws:ec2:%s:%s:instance/%s' % (
                instance_obj.placement['AvailabilityZone'][:-1], account_id,
                instance_id)

            if keys_dotag:
                logger.debug(
                    "[%s] Will tag %s with %s" %
                    (runType_prefix, instance_id, json.dumps(keys_dotag)))
                if not dryRun:
                    response = tagging_client.tag_resources(
                        ResourceARNList=[instance_arn], Tags=keys_dotag)

            if keys_untag:
                logger.debug(
                    "[%s] Will untag %s with %s" %
                    (runType_prefix, instance_id, json.dumps(keys_untag)))
                if not dryRun:
                    response = tagging_client.untag_resources(
                        ResourceARNList=[instance_arn], TagKeys=keys_untag)

        if dryRun:
            from termcolor import colored
            logger.info(
                colored(
                    "This was a dry run. Execute the same command again with `--not-dry-run` for actual tags push to aws ec2",
                    "red"))
예제 #23
0
 def suggest(self):
     logger.info("Dumping to csv")
     from .tagsSuggestBasic import dump_df_to_csv
     self.csv_fn = dump_df_to_csv(self.tags_df, 'isitfit-tags-dump-')
예제 #24
0
    def suggest(self):
        logger.info("Uploading ec2 names to s3")

        logger.debug("TagsSuggestAdvanced::suggest")

        # if status is not ok yet, ping again
        if self.api_man.r_register['isitfitapi_status'][
                'code'] == 'Registration in progress':
            self.api_man.register()

        # boto3 s3 client
        s3_client = self.api_man.boto3_session.client('s3')

        import tempfile
        from isitfit.dotMan import DotMan
        with tempfile.NamedTemporaryFile(suffix='.csv',
                                         prefix='isitfit-ec2names-',
                                         delete=True,
                                         dir=DotMan().tempdir()) as fh:
            logger.debug("Will use temporary file %s" % fh.name)
            self.tags_df.to_csv(fh.name, index=False)
            self.s3_key_suffix = 'tags_request.csv'
            s3_path = os.path.join(self.api_man.r_sts['Account'],
                                   self.api_man.r_sts['UserId'],
                                   self.s3_key_suffix)

            logger.debug("s3 PUT bucket=%s path=%s" %
                         (self.api_man.r_body['s3_bucketName'], s3_path))
            s3_client.put_object(Bucket=self.api_man.r_body['s3_bucketName'],
                                 Key=s3_path,
                                 Body=fh)

        # POST /tags/suggest
        r2, dt_now = self._tags_suggest()

        # now listen on sqs
        any_found = False
        for m in self.api_man.listen_sqs('tags suggest', dt_now):
            # if done
            if m is None: break

            # process messages
            any_found = True
            logger.info("Server message: %s" % m.body_decoded['status'])
            if m.body_decoded['status'] != 'calculation complete':
                continue

            if m.body_decoded['status'] == 'calculation complete':
                # upon calculation complete message
                if 's3_key_suffix' not in m.body_decoded:
                    logger.debug(
                        "(Missing s3_key_suffix key from body. Aborting)")
                    return

                self.csv_fn = None
                from isitfit.dotMan import DotMan
                with tempfile.NamedTemporaryFile(
                        suffix='.csv',
                        prefix='isitfit-tags-suggestAdvanced-',
                        delete=False,
                        dir=DotMan().tempdir()) as fh:
                    self.csv_fn = fh.name
                    s3_path = os.path.join(self.api_man.r_body['s3_keyPrefix'],
                                           m.body_decoded['s3_key_suffix'])
                    logger.info(
                        "Downloading tag suggestions from isitfit server")
                    logger.debug("Getting s3 file %s" % s3_path)
                    logger.debug("Saving it into %s" % fh.name)
                    response = s3_client.get_object(
                        Bucket=self.api_man.r_body['s3_bucketName'],
                        Key=s3_path)
                    fh.write(response['Body'].read())

                logger.debug("TagsSuggestAdvanced:suggest .. read_csv")
                import pandas as pd
                self.suggested_df = pd.read_csv(self.csv_fn, nrows=MAX_ROWS)

                # count number of rows in csv
                # https://stackoverflow.com/a/36973958/4126114
                logger.debug("TagsSuggestAdvanced:suggest .. count_rows")
                with open(fh.name) as f2:
                    self.suggested_shape = [
                        sum(1 for line in f2), 4
                    ]  # 4 is just hardcoded number of columns that doesn't matter much

                logger.debug("TagsSuggestAdvanced:suggest .. done")
                return

        # if nothing returned on sqs
        if not any_found:
            logger.error("Absolute radio silence on sqs :(")

        # either no sqs messages,
        # or found some sqs messages, but none were for tags request fulfilled
        import pandas as pd
        self.suggested_df = pd.DataFrame()
        self.suggested_shape = [0, 4]
        self.csv_fn = None