def delete_file(storage_dir, index, pos_metrics): """ Note: We do not delete the data file, just delete the tags in data file, so the space can reused by new metric. """ bucket, schema_name, fid = index bucket_data_dir = os.path.join(storage_dir, 'data', bucket) filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid) with open(filepath, "r+b") as fh: header_info = header(fh) tag_list = header_info["tag_list"] reserved_size = header_info["reserved_size"] archive_list = [(a["sec_per_point"], a["count"]) for a in header_info["archive_list"]] agg_name = Agg.get_agg_name(header_info["agg_id"]) released_size = 0 for pos_idx, tag in pos_metrics: if tag == tag_list[pos_idx]: tag_list[pos_idx] = "" released_size += len(tag) elif tag_list[pos_idx] != "": print >> sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx) if released_size != 0: inter_tag_list = tag_list + ["N" * (reserved_size + released_size)] packed_header, _ = pack_header(inter_tag_list, archive_list, header_info["x_files_factor"], agg_name) fh.write(packed_header)
def merge_metrics(meta, metric_paths, metric_names, output_file): ''' Merge metrics to a kenshin file. ''' # Get content(data points grouped by archive) of each metric. metrics_archives_points = [ get_metric_content(path, metric) for (path, metric) in zip(metric_paths, metric_names) ] # Merge metrics to a kenshin file with open(output_file, 'wb') as f: archives = meta['archive_list'] archive_info = [(archive['sec_per_point'], archive['count']) for archive in archives] inter_tag_list = metric_names + [''] # for reserved space # header packed_kenshin_header = Storage.pack_header( inter_tag_list, archive_info, meta['x_files_factor'], Agg.get_agg_name(meta['agg_id']), )[0] f.write(packed_kenshin_header) for i, archive in enumerate(archives): archive_points = [x[i] for x in metrics_archives_points] merged_points = merge_points(archive_points) points = fill_gap(merged_points, archive, len(meta['tag_list'])) packed_str = packed_kenshin_points(points) f.write(packed_str)
def add_tag(tag, path, pos_idx): with open(path, 'r+b') as fh: header_info = Storage.header(fh) tag_list = header_info['tag_list'] reserved_size = header_info['reserved_size'] archive_list = [(a['sec_per_point'], a['count']) for a in header_info['archive_list']] agg_name = Agg.get_agg_name(header_info['agg_id']) if len(tag) <= len(tag_list[pos_idx]) + reserved_size: diff = len(tag_list[pos_idx]) + reserved_size - len(tag) tag_list[pos_idx] = tag inter_tag_list = tag_list + ['N' * diff] packed_header, _ = Storage.pack_header( inter_tag_list, archive_list, header_info['x_files_factor'], agg_name) fh.write(packed_header) else: tag_list[pos_idx] = tag inter_tag_list = tag_list + [''] packed_header, _ = Storage.pack_header( inter_tag_list, archive_list, header_info['x_files_factor'], agg_name) tmpfile = path + '.tmp' with open(tmpfile, 'wb') as fh_tmp: fh_tmp.write(packed_header) fh.seek(header_info['archive_list'][0]['offset']) while True: bytes = fh.read(CHUNK_SIZE) if not bytes: break fh_tmp.write(bytes) os.rename(tmpfile, path)
def change_meta(data_file, schema, max_retention): with open(data_file, 'r+b') as f: format = '!2Lf' agg_id = Agg.get_agg_id(schema.aggregationMethod) xff = schema.xFilesFactor packed_data = struct.pack(format, agg_id, max_retention, xff) f.write(packed_data)
def delete_file(storage_dir, index, pos_metrics): """ Note: We do not delete the data file, just delete the tags in data file, so the space can reused by new metric. """ bucket, schema_name, fid = index bucket_data_dir = os.path.join(storage_dir, 'data', bucket) filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid) with open(filepath, "r+b") as fh: header_info = header(fh) tag_list = header_info["tag_list"] reserved_size = header_info["reserved_size"] archive_list = [(a["sec_per_point"], a["count"]) for a in header_info["archive_list"]] agg_name = Agg.get_agg_name(header_info["agg_id"]) released_size = 0 for pos_idx, tag in pos_metrics: if tag == tag_list[pos_idx]: tag_list[pos_idx] = "" released_size += len(tag) elif tag_list[pos_idx] != "": print >>sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx) if released_size != 0: inter_tag_list = tag_list + ["N" * (reserved_size + released_size)] packed_header, _ = pack_header(inter_tag_list, archive_list, header_info["x_files_factor"], agg_name) fh.write(packed_header)
def test_header(self): metric_name, tag_list, archive_list, x_files_factor, agg_name = self.basic_setup with open(self.path, 'rb') as f: header = self.storage.header(f) self.assertEqual(tag_list, header['tag_list']) self.assertEqual(x_files_factor, header['x_files_factor']) self.assertEqual(Agg.get_agg_id(agg_name), header['agg_id']) _archive_list = [(x['sec_per_point'], x['count']) for x in header['archive_list']] self.assertEqual(archive_list, _archive_list)
def resize_data_file(schema, data_file): print data_file with open(data_file) as f: header = kenshin.header(f) retentions = schema.archives old_retentions = [(x['sec_per_point'], x['count']) for x in header['archive_list']] msg = [] action = NO_OPERATION # x files factor if schema.xFilesFactor != header['x_files_factor']: action = CHANGE_META msg.append("x_files_factor: %f -> %f" % (header['x_files_factor'], schema.xFilesFactor)) # agg method old_agg_name = Agg.get_agg_name(header['agg_id']) if schema.aggregationMethod != old_agg_name: action = CHANGE_META msg.append("agg_name: %s -> %s" % (old_agg_name, schema.aggregationMethod)) # retentions if retentions != old_retentions: action = REBUILD msg.append("retentions: %s -> %s" % (old_retentions, retentions)) if action == NO_OPERATION: print "No operation needed." return elif action == CHANGE_META: print 'Change Meta.' print '\n'.join(msg) change_meta(data_file, schema, header['max_retention']) return elif action == REBUILD: print 'Rebuild File.' print '\n'.join(msg) rebuild(data_file, schema, header, retentions) else: raise ValueError(action)
def _get_agg_value(self, higher_points, tag_cnt, agg_id, ts_start, ts_end): higher_points = higher_points[::-1] agg_func = Agg.get_agg_func(agg_id) step = tag_cnt + 1 # points format: # t1 v11 v12, # t2 v21 v22, # t3 v31 v32, points = [ higher_points[i:i + step] for i in xrange(0, len(higher_points), step) ] valid_points = self.filter_points_by_time(points, ts_start, ts_end) if not valid_points: val = [NULL_VALUE] * tag_cnt else: points = np.array(valid_points) points = points.transpose() val = [agg_func(self.filter_values(x)) for x in points[1:]] return val
def pack_header(inter_tag_list, archive_list, x_files_factor, agg_name): # tag tag = str('\t'.join(inter_tag_list)) # metadata agg_id = Agg.get_agg_id(agg_name) max_retention = reduce(operator.mul, archive_list[-1], 1) xff = x_files_factor archive_cnt = len(archive_list) tag_size = len(tag) point_size = struct.calcsize(POINT_FORMAT % (len(inter_tag_list) - 1)) metadata = struct.pack(METADATA_FORMAT, agg_id, max_retention, xff, archive_cnt, tag_size, point_size) # archive_info header = [metadata, tag] offset = METADATA_SIZE + len( tag) + ARCHIVEINFO_SIZE * len(archive_list) for sec, cnt in archive_list: archive_info = struct.pack(ARCHIVEINFO_FORMAT, offset, sec, cnt) header.append(archive_info) offset += point_size * cnt return ''.join(header), offset
def test_get_agg_id(self): for i, agg in enumerate(Agg.get_agg_type_list()): id_ = Agg.get_agg_id(agg) self.assertEqual(id_, i)
def _get_agg_func_by_name(self, name): return Agg.get_agg_func(Agg.get_agg_id(name))
def resize_data_file(schema, data_file): print data_file rebuild = False with open(data_file) as f: header = kenshin.header(f) retentions = schema.archives old_retentions = [(x['sec_per_point'], x['count']) for x in header['archive_list']] msg = "" if retentions != old_retentions: rebuild = True msg += "retentions:\n%s -> %s" % (old_retentions, retentions) if not rebuild: print "No operation needed." return print msg now = int(time.time()) tmpfile = data_file + '.tmp' if os.path.exists(tmpfile): print "Removing previous temporary database file: %s" % tmpfile os.unlink(tmpfile) print "Creating new kenshin database: %s" % tmpfile kenshin.create(tmpfile, [''] * len(header['tag_list']), schema.archives, header['x_files_factor'], Agg.get_agg_name(header['agg_id'])) for i, t in enumerate(header['tag_list']): kenshin.add_tag(t, tmpfile, i) size = os.stat(tmpfile).st_size old_size = os.stat(data_file).st_size print "Created: %s (%d bytes, was %d bytes)" % ( tmpfile, size, old_size) print "Migrating data to new kenshin database ..." for archive in header['archive_list']: from_time = now - archive['retention'] + archive['sec_per_point'] until_time = now _, timeinfo, values = kenshin.fetch(data_file, from_time, until_time) datapoints = zip(range(*timeinfo), values) datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]] for _, values in datapoints: for i, v in enumerate(values): if v is None: values[i] = NULL_VALUE kenshin.update(tmpfile, datapoints) backup = data_file + ".bak" print 'Renaming old database to: %s' % backup os.rename(data_file, backup) print "Renaming new database to: %s" % data_file try: os.rename(tmpfile, data_file) except Exception as e: print "Operation failed, restoring backup" os.rename(backup, data_file) raise e
def resize_metric(metric, schema, data_dirs): rebuild = False msg = "" path = get_metric_path(metric, data_dirs) print path with open(path) as f: header = kenshin.header(f) retentions = schema.archives old_retentions = [(r['sec_per_point'], r['count']) for r in header['archive_list']] if retentions != old_retentions: rebuild = True msg += "retentions:\n%s -> %s" % (retentions, old_retentions) if not rebuild: print 'No Operation Needed.' else: print msg now = int(time.time()) tmpfile = path + '.tmp' if os.path.exists(tmpfile): print 'Removing previous temporary database file: %s' % tmpfile os.unlink(tmpfile) print 'Creating new kenshin database: %s' % tmpfile kenshin.create(tmpfile, [''] * len(header['tag_list']), schema.archives, header['x_files_factor'], Agg.get_agg_name(header['agg_id'])) for i, t in enumerate(header['tag_list']): kenshin.add_tag(t, tmpfile, i) size = os.stat(tmpfile).st_size old_size = os.stat(tmpfile).st_size print 'Created: %s (%d bytes, was %d bytes)' % (tmpfile, size, old_size) print 'Migrating data to new kenshin database ...' for archive in header['archive_list']: from_time = now - archive['retention'] + archive['sec_per_point'] until_time = now _, timeinfo, values = kenshin.fetch(path, from_time, until_time) datapoints = zip(range(*timeinfo), values) datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]] for ts, values in datapoints: for i, v in enumerate(values): if v is None: values[i] = NULL_VALUE kenshin.update(tmpfile, datapoints) backup = path + '.bak' print 'Renaming old database to: %s' % backup os.rename(path, backup) print 'Renaming new database to: %s' % path try: os.rename(tmpfile, path) except: os.rename(backup, path) raise IOError('Operation failed, restoring backup')