Ejemplo n.º 1
0
def delete_file(storage_dir, index, pos_metrics):
    """
    Note: We do not delete the data file, just delete the tags in data file,
    so the space can reused by new metric.
    """
    bucket, schema_name, fid = index
    bucket_data_dir = os.path.join(storage_dir, 'data', bucket)
    filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid)

    with open(filepath, "r+b") as fh:
        header_info = header(fh)
        tag_list = header_info["tag_list"]
        reserved_size = header_info["reserved_size"]
        archive_list = [(a["sec_per_point"], a["count"])
                        for a in header_info["archive_list"]]
        agg_name = Agg.get_agg_name(header_info["agg_id"])

        released_size = 0
        for pos_idx, tag in pos_metrics:
            if tag == tag_list[pos_idx]:
                tag_list[pos_idx] = ""
                released_size += len(tag)
            elif tag_list[pos_idx] != "":
                print >> sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx)

        if released_size != 0:
            inter_tag_list = tag_list + ["N" * (reserved_size + released_size)]
            packed_header, _ = pack_header(inter_tag_list, archive_list,
                                           header_info["x_files_factor"],
                                           agg_name)
            fh.write(packed_header)
Ejemplo n.º 2
0
def merge_metrics(meta, metric_paths, metric_names, output_file):
    ''' Merge metrics to a kenshin file.
    '''
    # Get content(data points grouped by archive) of each metric.
    metrics_archives_points = [
        get_metric_content(path, metric)
        for (path, metric) in zip(metric_paths, metric_names)
    ]

    # Merge metrics to a kenshin file
    with open(output_file, 'wb') as f:
        archives = meta['archive_list']
        archive_info = [(archive['sec_per_point'], archive['count'])
                        for archive in archives]
        inter_tag_list = metric_names + ['']  # for reserved space

        # header
        packed_kenshin_header = Storage.pack_header(
            inter_tag_list,
            archive_info,
            meta['x_files_factor'],
            Agg.get_agg_name(meta['agg_id']),
            )[0]
        f.write(packed_kenshin_header)

        for i, archive in enumerate(archives):
            archive_points = [x[i] for x in metrics_archives_points]
            merged_points = merge_points(archive_points)
            points = fill_gap(merged_points, archive, len(meta['tag_list']))
            packed_str = packed_kenshin_points(points)
            f.write(packed_str)
Ejemplo n.º 3
0
    def add_tag(tag, path, pos_idx):
        with open(path, 'r+b') as fh:
            header_info = Storage.header(fh)
            tag_list = header_info['tag_list']
            reserved_size = header_info['reserved_size']

            archive_list = [(a['sec_per_point'], a['count'])
                            for a in header_info['archive_list']]
            agg_name = Agg.get_agg_name(header_info['agg_id'])

            if len(tag) <= len(tag_list[pos_idx]) + reserved_size:
                diff = len(tag_list[pos_idx]) + reserved_size - len(tag)
                tag_list[pos_idx] = tag
                inter_tag_list = tag_list + ['N' * diff]
                packed_header, _ = Storage.pack_header(
                    inter_tag_list, archive_list,
                    header_info['x_files_factor'], agg_name)
                fh.write(packed_header)
            else:
                tag_list[pos_idx] = tag
                inter_tag_list = tag_list + ['']
                packed_header, _ = Storage.pack_header(
                    inter_tag_list, archive_list,
                    header_info['x_files_factor'], agg_name)
                tmpfile = path + '.tmp'
                with open(tmpfile, 'wb') as fh_tmp:
                    fh_tmp.write(packed_header)
                    fh.seek(header_info['archive_list'][0]['offset'])
                    while True:
                        bytes = fh.read(CHUNK_SIZE)
                        if not bytes:
                            break
                        fh_tmp.write(bytes)
                os.rename(tmpfile, path)
Ejemplo n.º 4
0
def merge_metrics(meta, metric_paths, metric_names, output_file):
    ''' Merge metrics to a kenshin file.
    '''
    # Get content(data points grouped by archive) of each metric.
    metrics_archives_points = [
        get_metric_content(path, metric)
        for (path, metric) in zip(metric_paths, metric_names)
    ]

    # Merge metrics to a kenshin file
    with open(output_file, 'wb') as f:
        archives = meta['archive_list']
        archive_info = [(archive['sec_per_point'], archive['count'])
                        for archive in archives]
        inter_tag_list = metric_names + ['']  # for reserved space

        # header
        packed_kenshin_header = Storage.pack_header(
            inter_tag_list,
            archive_info,
            meta['x_files_factor'],
            Agg.get_agg_name(meta['agg_id']),
        )[0]
        f.write(packed_kenshin_header)

        for i, archive in enumerate(archives):
            archive_points = [x[i] for x in metrics_archives_points]
            merged_points = merge_points(archive_points)
            points = fill_gap(merged_points, archive, len(meta['tag_list']))
            packed_str = packed_kenshin_points(points)
            f.write(packed_str)
Ejemplo n.º 5
0
def change_meta(data_file, schema, max_retention):
    with open(data_file, 'r+b') as f:
        format = '!2Lf'
        agg_id = Agg.get_agg_id(schema.aggregationMethod)
        xff = schema.xFilesFactor
        packed_data = struct.pack(format, agg_id, max_retention, xff)
        f.write(packed_data)
Ejemplo n.º 6
0
def change_meta(data_file, schema, max_retention):
    with open(data_file, 'r+b') as f:
        format = '!2Lf'
        agg_id = Agg.get_agg_id(schema.aggregationMethod)
        xff = schema.xFilesFactor
        packed_data = struct.pack(format, agg_id, max_retention, xff)
        f.write(packed_data)
Ejemplo n.º 7
0
def delete_file(storage_dir, index, pos_metrics):
    """
    Note: We do not delete the data file, just delete the tags in data file,
    so the space can reused by new metric.
    """
    bucket, schema_name, fid = index
    bucket_data_dir = os.path.join(storage_dir, 'data', bucket)
    filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid)

    with open(filepath, "r+b") as fh:
        header_info = header(fh)
        tag_list = header_info["tag_list"]
        reserved_size = header_info["reserved_size"]
        archive_list = [(a["sec_per_point"], a["count"])
                        for a in header_info["archive_list"]]
        agg_name = Agg.get_agg_name(header_info["agg_id"])

        released_size = 0
        for pos_idx, tag in pos_metrics:
            if tag == tag_list[pos_idx]:
                tag_list[pos_idx] = ""
                released_size += len(tag)
            elif tag_list[pos_idx] != "":
                print >>sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx)

        if released_size != 0:
            inter_tag_list = tag_list + ["N" * (reserved_size + released_size)]
            packed_header, _ = pack_header(inter_tag_list,
                                           archive_list,
                                           header_info["x_files_factor"],
                                           agg_name)
            fh.write(packed_header)
Ejemplo n.º 8
0
    def test_header(self):
        metric_name, tag_list, archive_list, x_files_factor, agg_name = self.basic_setup
        with open(self.path, 'rb') as f:
            header = self.storage.header(f)

        self.assertEqual(tag_list, header['tag_list'])
        self.assertEqual(x_files_factor, header['x_files_factor'])
        self.assertEqual(Agg.get_agg_id(agg_name), header['agg_id'])

        _archive_list = [(x['sec_per_point'], x['count'])
                         for x in header['archive_list']]
        self.assertEqual(archive_list, _archive_list)
Ejemplo n.º 9
0
def resize_data_file(schema, data_file):
    print data_file
    with open(data_file) as f:
        header = kenshin.header(f)
    retentions = schema.archives
    old_retentions = [(x['sec_per_point'], x['count'])
                      for x in header['archive_list']]
    msg = []
    action = NO_OPERATION

    # x files factor
    if schema.xFilesFactor != header['x_files_factor']:
        action = CHANGE_META
        msg.append("x_files_factor: %f -> %f" %
                   (header['x_files_factor'], schema.xFilesFactor))

    # agg method
    old_agg_name = Agg.get_agg_name(header['agg_id'])
    if schema.aggregationMethod != old_agg_name:
        action = CHANGE_META
        msg.append("agg_name: %s -> %s" %
                   (old_agg_name, schema.aggregationMethod))

    # retentions
    if retentions != old_retentions:
        action = REBUILD
        msg.append("retentions: %s -> %s" % (old_retentions, retentions))

    if action == NO_OPERATION:
        print "No operation needed."
        return

    elif action == CHANGE_META:
        print 'Change Meta.'
        print '\n'.join(msg)
        change_meta(data_file, schema, header['max_retention'])
        return

    elif action == REBUILD:
        print 'Rebuild File.'
        print '\n'.join(msg)
        rebuild(data_file, schema, header, retentions)

    else:
        raise ValueError(action)
Ejemplo n.º 10
0
def resize_data_file(schema, data_file):
    print data_file
    with open(data_file) as f:
        header = kenshin.header(f)
    retentions = schema.archives
    old_retentions = [(x['sec_per_point'], x['count'])
                      for x in header['archive_list']]
    msg = []
    action = NO_OPERATION

    # x files factor
    if schema.xFilesFactor != header['x_files_factor']:
        action = CHANGE_META
        msg.append("x_files_factor: %f -> %f" %
                   (header['x_files_factor'], schema.xFilesFactor))

    # agg method
    old_agg_name = Agg.get_agg_name(header['agg_id'])
    if schema.aggregationMethod != old_agg_name:
        action = CHANGE_META
        msg.append("agg_name: %s -> %s" %
                   (old_agg_name, schema.aggregationMethod))

    # retentions
    if retentions != old_retentions:
        action = REBUILD
        msg.append("retentions: %s -> %s" % (old_retentions, retentions))

    if action == NO_OPERATION:
        print "No operation needed."
        return

    elif action == CHANGE_META:
        print 'Change Meta.'
        print '\n'.join(msg)
        change_meta(data_file, schema, header['max_retention'])
        return

    elif action == REBUILD:
        print 'Rebuild File.'
        print '\n'.join(msg)
        rebuild(data_file, schema, header, retentions)

    else:
        raise ValueError(action)
Ejemplo n.º 11
0
    def _get_agg_value(self, higher_points, tag_cnt, agg_id, ts_start, ts_end):
        higher_points = higher_points[::-1]
        agg_func = Agg.get_agg_func(agg_id)
        step = tag_cnt + 1

        # points format:
        # t1 v11 v12,
        # t2 v21 v22,
        # t3 v31 v32,
        points = [
            higher_points[i:i + step]
            for i in xrange(0, len(higher_points), step)
        ]
        valid_points = self.filter_points_by_time(points, ts_start, ts_end)
        if not valid_points:
            val = [NULL_VALUE] * tag_cnt
        else:
            points = np.array(valid_points)
            points = points.transpose()
            val = [agg_func(self.filter_values(x)) for x in points[1:]]
        return val
Ejemplo n.º 12
0
    def pack_header(inter_tag_list, archive_list, x_files_factor, agg_name):
        # tag
        tag = str('\t'.join(inter_tag_list))

        # metadata
        agg_id = Agg.get_agg_id(agg_name)
        max_retention = reduce(operator.mul, archive_list[-1], 1)
        xff = x_files_factor
        archive_cnt = len(archive_list)
        tag_size = len(tag)
        point_size = struct.calcsize(POINT_FORMAT % (len(inter_tag_list) - 1))
        metadata = struct.pack(METADATA_FORMAT, agg_id, max_retention, xff,
                               archive_cnt, tag_size, point_size)

        # archive_info
        header = [metadata, tag]
        offset = METADATA_SIZE + len(
            tag) + ARCHIVEINFO_SIZE * len(archive_list)

        for sec, cnt in archive_list:
            archive_info = struct.pack(ARCHIVEINFO_FORMAT, offset, sec, cnt)
            header.append(archive_info)
            offset += point_size * cnt
        return ''.join(header), offset
Ejemplo n.º 13
0
 def test_get_agg_id(self):
     for i, agg in enumerate(Agg.get_agg_type_list()):
         id_ = Agg.get_agg_id(agg)
         self.assertEqual(id_, i)
Ejemplo n.º 14
0
 def _get_agg_func_by_name(self, name):
     return Agg.get_agg_func(Agg.get_agg_id(name))
Ejemplo n.º 15
0
def resize_data_file(schema, data_file):
    print data_file
    rebuild = False
    with open(data_file) as f:
        header = kenshin.header(f)
    retentions = schema.archives
    old_retentions = [(x['sec_per_point'], x['count'])
                      for x in header['archive_list']]
    msg = ""
    if retentions != old_retentions:
        rebuild = True
        msg += "retentions:\n%s -> %s" % (old_retentions, retentions)

    if not rebuild:
        print "No operation needed."
        return

    print msg
    now = int(time.time())
    tmpfile = data_file + '.tmp'
    if os.path.exists(tmpfile):
        print "Removing previous temporary database file: %s" % tmpfile
        os.unlink(tmpfile)

    print "Creating new kenshin database: %s" % tmpfile
    kenshin.create(tmpfile,
                   [''] * len(header['tag_list']),
                   schema.archives,
                   header['x_files_factor'],
                   Agg.get_agg_name(header['agg_id']))
    for i, t in enumerate(header['tag_list']):
        kenshin.add_tag(t, tmpfile, i)

    size = os.stat(tmpfile).st_size
    old_size = os.stat(data_file).st_size

    print "Created: %s (%d bytes, was %d bytes)" % (
        tmpfile, size, old_size)

    print "Migrating data to new kenshin database ..."
    for archive in header['archive_list']:
        from_time = now - archive['retention'] + archive['sec_per_point']
        until_time = now
        _, timeinfo, values = kenshin.fetch(data_file, from_time, until_time)
        datapoints = zip(range(*timeinfo), values)
        datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]]
        for _, values in datapoints:
            for i, v in enumerate(values):
                if v is None:
                    values[i] = NULL_VALUE
        kenshin.update(tmpfile, datapoints)
    backup = data_file + ".bak"

    print 'Renaming old database to: %s' % backup
    os.rename(data_file, backup)

    print "Renaming new database to: %s" % data_file
    try:
        os.rename(tmpfile, data_file)
    except Exception as e:
        print "Operation failed, restoring backup"
        os.rename(backup, data_file)
        raise e
Ejemplo n.º 16
0
 def test_get_agg_id(self):
     for i, agg in enumerate(Agg.get_agg_type_list()):
         id_ = Agg.get_agg_id(agg)
         self.assertEqual(id_, i)
Ejemplo n.º 17
0
 def _get_agg_func_by_name(self, name):
     return Agg.get_agg_func(Agg.get_agg_id(name))
Ejemplo n.º 18
0
def resize_metric(metric, schema, data_dirs):
    rebuild = False
    msg = ""

    path = get_metric_path(metric, data_dirs)
    print path
    with open(path) as f:
        header = kenshin.header(f)
    retentions = schema.archives
    old_retentions = [(r['sec_per_point'], r['count'])
                      for r in header['archive_list']]

    if retentions != old_retentions:
        rebuild = True
        msg += "retentions:\n%s -> %s" % (retentions, old_retentions)

    if not rebuild:
        print 'No Operation Needed.'
    else:
        print msg
        now = int(time.time())

        tmpfile = path + '.tmp'
        if os.path.exists(tmpfile):
            print 'Removing previous temporary database file: %s' % tmpfile
            os.unlink(tmpfile)

        print 'Creating new kenshin database: %s' % tmpfile
        kenshin.create(tmpfile, [''] * len(header['tag_list']),
                       schema.archives, header['x_files_factor'],
                       Agg.get_agg_name(header['agg_id']))
        for i, t in enumerate(header['tag_list']):
            kenshin.add_tag(t, tmpfile, i)

        size = os.stat(tmpfile).st_size
        old_size = os.stat(tmpfile).st_size
        print 'Created: %s (%d bytes, was %d bytes)' % (tmpfile, size,
                                                        old_size)

        print 'Migrating data to new kenshin database ...'
        for archive in header['archive_list']:
            from_time = now - archive['retention'] + archive['sec_per_point']
            until_time = now
            _, timeinfo, values = kenshin.fetch(path, from_time, until_time)
            datapoints = zip(range(*timeinfo), values)
            datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]]
            for ts, values in datapoints:
                for i, v in enumerate(values):
                    if v is None:
                        values[i] = NULL_VALUE
            kenshin.update(tmpfile, datapoints)

        backup = path + '.bak'
        print 'Renaming old database to: %s' % backup
        os.rename(path, backup)

        print 'Renaming new database to: %s' % path
        try:
            os.rename(tmpfile, path)
        except:
            os.rename(backup, path)
            raise IOError('Operation failed, restoring backup')