コード例 #1
0
ファイル: aggregate.py プロジェクト: kartvep/Combaine
def Main(groupname, config_name, agg_config_name, previous_time, current_time):
    uuid = hashlib.md5("%s%s%s%i%i" %(groupname, config_name, agg_config_name, previous_time, current_time)).hexdigest()[:10]
    logger = AggregateLogger(uuid)
    logger.info("Start aggregation: %s %s %s %i-%i" % (groupname, config_name, agg_config_name, previous_time, current_time))

    conf = ParsingConfigurator(config_name, agg_config_name)

    ds = DistributedStorageFactory(**conf.ds) # Get Distributed storage  
    if ds is None:
        logger.error('Failed to init distributed storage like MongoRS')
        return 'failed'

    if not ds.connect('combaine_mid/%s' % config_name):
        logger.error('Cannot connect to distributed storage like MongoRS')
        return 'failed'

    res_handlers = [item for item in (ResultHandlerFactory(**_cfg) for _cfg in conf.resulthadlers) if item is not None]

    aggs = dict((_agg.name, _agg) for _agg in (AggregatorFactory(**agg_config) for agg_config in conf.aggregators))

    hosts = split_hosts_by_dc(groupname)

    all_data = list()
    for sbgrp in hosts.values():
        data_by_subgrp = collections.defaultdict(list)
        for hst in sbgrp:
           _l = ((ds.read("%s;%s;%i;%i;%s" % (hst.replace('-','_').replace('.','_'),\
                                                config_name, previous_time, current_time, _agg)\
                                            ), _agg) for _agg in aggs)
           [data_by_subgrp[_name].append(val) for val, _name in _l]

        all_data.append(dict(data_by_subgrp))


    res = []
    for key in aggs.iterkeys():
        l = [ _item[key] for _item in all_data if _item.has_key(key)]
        one_agg_result = AggRes(aggs[key].name, hosts.keys(), conf.metahost or groupname, agg_config_name)
        one_agg_result.store_result(next(aggs[key].aggregate_group(l)))
        res.append(one_agg_result)

    #==== Clean RS from sourse data for aggregation ====
    logger.info("Hadling data by result handlers")
    print res_handlers
    try:
        for _res_handler in res_handlers:
            _res_handler.send(res) 
    except Exception as err:
        logger.exception(err)
        
    ds.close()
    logger.info("Aggregation has finished successfully")
    return "Success"
コード例 #2
0
ファイル: parsing.py プロジェクト: kartvep/Combaine
def Main(host_name, config_name, group_name, previous_time, current_time):
    reload(parsers) # for d0uble - he wants to reload parsing functions
    uuid = hashlib.md5("%s%s%s%i%i" %(host_name, config_name, group_name, previous_time, current_time)).hexdigest()[:10]
    logger = ParsingLogger(uuid)
    logger.info("Start parsing: %s %s %s %i %i" %(host_name, config_name, group_name, previous_time, current_time))
    conf = ParsingConfigurator(config_name)

    # Construct parser function
    parser = PARSERS.get(conf.parser, None)

    if parser is None:
        logger.error('No properly parser available')
        return "failed; No parser"

    # Construct Distributed Storage
    ds = DistributedStorageFactory(**conf.ds) # Get Distributed storage  
    if ds is None:
        logger.error('Failed to init distributed storage like MongoRS')
        return 'failed; DS init Error'
    if not ds.connect('combaine_mid/%s' % config_name): # CHECK NAME OF COLLECTION!!!!
        logger.error('Cannot connect to distributed storage like MongoRS')
        return 'failed; Connect to DS'
    
    # Construct Data Fetcher
    df = FetcherFactory(**conf.df)    # Get DataFetcher
    if df is None:
        logger.error('%s Failed to init datafetcher' % uuid)
        return 'failed; Failed to init DF'

    # Construct aggregators
    aggs = [AggregatorFactory(**agg_config) for agg_config in conf.aggregators]

    #fetch data
    data = df.getData(host_name, (previous_time, current_time))

    if not data:
        logger.warning('%s Empty data from datafetcher' % uuid)
        return 'failed; Empty data from DF'

    handle_data = (l for l in parser(data) if df.filter(l))
    handle_data = [l for l in handle_data if l is not None]

    if len(handle_data) == 0:
        logger.info("Zero size of handling data list after parsing and filter")
        return 'failed; Zero size of handling data list after parsing and filter'

    # TBD wrap in separate fucntion ->
    if any(_agg.agg_type == TYPES.index("RAW") for _agg in aggs):
        db = DataGridFactory(**conf.db)  # Get DataGrid
        if db is None:
            logger.error('Failed to init local databse')
            return 'failed; Failed to init DG'

        [_agg.set_datagrid_backend(db) for _agg in aggs if _agg.agg_type == TYPES.index("RAW")]


        tablename = ''.join(group_name[:30]) + hashlib.md5('%s_%s_%s' % (config_name, group_name, host_name)).hexdigest()
        if not db.putData(handle_data , tablename):
            logger.warning('Empty data for localdb')
            return 'failed; No data for local db'

    # TBD end of wrap
    if any(_agg.agg_type == TYPES.index("PROCESSED") for _agg in aggs):
        [_agg.set_data(handle_data) for _agg in aggs if _agg.agg_type == TYPES.index("PROCESSED")]

    res = itertools.chain([_agg.aggregate((previous_time, current_time)) for _agg in aggs])
    logger.debug("Send data to storage: %s" % [ds.insert("%(host)s;%(conf)s;%(time)s;%(etime)s;%(aggname)s" % {\
                                                                    'host'  : host_name.replace('.','_').replace('-','_'),\
                                                                    'conf'  : config_name,\
                                                                    'time'  : previous_time,\
                                                                    'etime' : current_time,\
                                                                  'aggname' : l[0]},
                                                                                     l[1]) for l in res])
    ds.close()
    logger.info('Parsing has finished successfully')
    return 'success'