def delete_file(storage_dir, index, pos_metrics): """ Note: We do not delete the data file, just delete the tags in data file, so the space can reused by new metric. """ bucket, schema_name, fid = index bucket_data_dir = os.path.join(storage_dir, 'data', bucket) filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid) with open(filepath, "r+b") as fh: header_info = header(fh) tag_list = header_info["tag_list"] reserved_size = header_info["reserved_size"] archive_list = [(a["sec_per_point"], a["count"]) for a in header_info["archive_list"]] agg_name = Agg.get_agg_name(header_info["agg_id"]) released_size = 0 for pos_idx, tag in pos_metrics: if tag == tag_list[pos_idx]: tag_list[pos_idx] = "" released_size += len(tag) elif tag_list[pos_idx] != "": print >> sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx) if released_size != 0: inter_tag_list = tag_list + ["N" * (reserved_size + released_size)] packed_header, _ = pack_header(inter_tag_list, archive_list, header_info["x_files_factor"], agg_name) fh.write(packed_header)
def delete_file(storage_dir, index, pos_metrics): """ Note: We do not delete the data file, just delete the tags in data file, so the space can reused by new metric. """ bucket, schema_name, fid = index bucket_data_dir = os.path.join(storage_dir, 'data', bucket) filepath = getFilePathByInstanceDir(bucket_data_dir, schema_name, fid) with open(filepath, "r+b") as fh: header_info = header(fh) tag_list = header_info["tag_list"] reserved_size = header_info["reserved_size"] archive_list = [(a["sec_per_point"], a["count"]) for a in header_info["archive_list"]] agg_name = Agg.get_agg_name(header_info["agg_id"]) released_size = 0 for pos_idx, tag in pos_metrics: if tag == tag_list[pos_idx]: tag_list[pos_idx] = "" released_size += len(tag) elif tag_list[pos_idx] != "": print >>sys.stderr, "tag not match: (%s, %d)" % (tag, pos_idx) if released_size != 0: inter_tag_list = tag_list + ["N" * (reserved_size + released_size)] packed_header, _ = pack_header(inter_tag_list, archive_list, header_info["x_files_factor"], agg_name) fh.write(packed_header)
def main(): if len(sys.argv) < 3: print('Need data_dir and link_dir.\n' 'e.g.: kenshin-rebuild-link.py /kenshin/data/a /kenshin/link/a') sys.exit(1) data_dir, link_dir = sys.argv[1:] data_dir = os.path.abspath(data_dir) link_dir = os.path.abspath(link_dir) for schema_name in os.listdir(data_dir): hs_file_pat = os.path.join(data_dir, schema_name, '*.hs') for fp in glob.glob(hs_file_pat): with open(fp) as f: header = kenshin.header(f) metric_list = header['tag_list'] for metric in metric_list: if metric != '': try: create_link(metric, link_dir, fp) except OSError as exc: if exc.errno == errno.ENAMETOOLONG: pass else: raise
def rebuildLink(instance_data_dir, instance_link_dir): for schema_name in os.listdir(instance_data_dir): hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs') for fp in glob.glob(hs_file_pat): with open(fp) as f: header = kenshin.header(f) metric_list = header['tag_list'] for metric in metric_list: if metric != '': link_path = getMetricPathByInstanceDir(instance_link_dir, metric) _createLinkHelper(link_path, fp)
def rebuildLink(instance_data_dir, instance_link_dir): for schema_name in os.listdir(instance_data_dir): hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs') for fp in glob.glob(hs_file_pat): with open(fp) as f: header = kenshin.header(f) metric_list = header['tag_list'] for metric in metric_list: if metric != '': link_path = getMetricPathByInstanceDir(instance_link_dir, metric) try: _createLinkHelper(link_path, fp) except OSError as exc: if exc.errno == errno.ENAMETOOLONG: pass else: raise
def resize_data_file(schema, data_file): print data_file with open(data_file) as f: header = kenshin.header(f) retentions = schema.archives old_retentions = [(x['sec_per_point'], x['count']) for x in header['archive_list']] msg = [] action = NO_OPERATION # x files factor if schema.xFilesFactor != header['x_files_factor']: action = CHANGE_META msg.append("x_files_factor: %f -> %f" % (header['x_files_factor'], schema.xFilesFactor)) # agg method old_agg_name = Agg.get_agg_name(header['agg_id']) if schema.aggregationMethod != old_agg_name: action = CHANGE_META msg.append("agg_name: %s -> %s" % (old_agg_name, schema.aggregationMethod)) # retentions if retentions != old_retentions: action = REBUILD msg.append("retentions: %s -> %s" % (old_retentions, retentions)) if action == NO_OPERATION: print "No operation needed." return elif action == CHANGE_META: print 'Change Meta.' print '\n'.join(msg) change_meta(data_file, schema, header['max_retention']) return elif action == REBUILD: print 'Rebuild File.' print '\n'.join(msg) rebuild(data_file, schema, header, retentions) else: raise ValueError(action)
def rebuildLink(instance_data_dir, instance_link_dir): for schema_name in os.listdir(instance_data_dir): hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs') for fp in glob.glob(hs_file_pat): with open(fp) as f: header = kenshin.header(f) metric_list = header['tag_list'] for metric in metric_list: if metric != '': link_path = getMetricPathByInstanceDir( instance_link_dir, metric) try: _createLinkHelper(link_path, fp) except OSError as exc: if exc.errno == errno.ENAMETOOLONG: pass else: raise
def rebuildIndex(instance_data_dir, instance_index_file): """ Rebuild index file from data file, if a data file has no valid metric, we will remove it. """ out = open(instance_index_file, 'w') for schema_name in os.listdir(instance_data_dir): hs_file_pat = os.path.join(instance_data_dir, schema_name, '*.hs') for fp in glob.glob(hs_file_pat): with open(fp) as f: empty_flag = True header = kenshin.header(f) metric_list = header['tag_list'] file_id = splitext(basename(fp))[0] for i, metric in enumerate(metric_list): if metric != '': empty_flag = False out.write('%s %s %s %s\n' % (metric, schema_name, file_id, i)) if empty_flag: os.remove(fp) out.close()
def run(filepath, archive_idx, point_idx, error): with open(filepath) as f: header = kenshin.header(f) archive = header['archive_list'][archive_idx] point_size = header['point_size'] point_format = header['point_format'] start_offset = archive['offset'] + point_idx * point_size if point_idx < 0: start_offset += archive['size'] point = get_point(f, start_offset, point_size, point_format) print 'count: %s' % archive['count'] if not error: metric = get_metric(filepath) date_str = timestamp_to_datestr(point[0]) if metric: idx = header['tag_list'].index(metric) return (point[0], point[idx + 1]), date_str else: return point, date_str else: sec_per_point = archive['sec_per_point'] ts = point[0] start_offset += point_size point_idx += 1 while start_offset < archive['size'] + archive['offset']: point = get_point(f, start_offset, point_size, point_format) if point[0] != ts + sec_per_point: return point_idx start_offset += point_size point_idx += 1 ts = point[0] return 'No error!'
def getIntervals(self): with open(self.fs_path) as f: start = time.time() - kenshin.header(f)['max_retention'] end = max(os.stat(self.fs_path).st_mtime, start) return [(start, end)]
def get_intervals(self): with open(self.fs_path) as f: start = time.time() - kenshin.header(f)['max_retention'] end = max(os.stat(self.fs_path).st_mtime, start) return IntervalSet([Interval(start, end)])
#!/usr/bin/env python # coding: utf-8 from pprint import pprint import kenshin if __name__ == '__main__': import sys if len(sys.argv) < 2: print 'Usage: kenshin-info.py <file_path>' sys.exit(1) path = sys.argv[1] with open(path) as f: pprint(kenshin.header(f))
def resize_data_file(schema, data_file): print data_file rebuild = False with open(data_file) as f: header = kenshin.header(f) retentions = schema.archives old_retentions = [(x['sec_per_point'], x['count']) for x in header['archive_list']] msg = "" if retentions != old_retentions: rebuild = True msg += "retentions:\n%s -> %s" % (old_retentions, retentions) if not rebuild: print "No operation needed." return print msg now = int(time.time()) tmpfile = data_file + '.tmp' if os.path.exists(tmpfile): print "Removing previous temporary database file: %s" % tmpfile os.unlink(tmpfile) print "Creating new kenshin database: %s" % tmpfile kenshin.create(tmpfile, [''] * len(header['tag_list']), schema.archives, header['x_files_factor'], Agg.get_agg_name(header['agg_id'])) for i, t in enumerate(header['tag_list']): kenshin.add_tag(t, tmpfile, i) size = os.stat(tmpfile).st_size old_size = os.stat(data_file).st_size print "Created: %s (%d bytes, was %d bytes)" % ( tmpfile, size, old_size) print "Migrating data to new kenshin database ..." for archive in header['archive_list']: from_time = now - archive['retention'] + archive['sec_per_point'] until_time = now _, timeinfo, values = kenshin.fetch(data_file, from_time, until_time) datapoints = zip(range(*timeinfo), values) datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]] for _, values in datapoints: for i, v in enumerate(values): if v is None: values[i] = NULL_VALUE kenshin.update(tmpfile, datapoints) backup = data_file + ".bak" print 'Renaming old database to: %s' % backup os.rename(data_file, backup) print "Renaming new database to: %s" % data_file try: os.rename(tmpfile, data_file) except Exception as e: print "Operation failed, restoring backup" os.rename(backup, data_file) raise e
def resize_metric(metric, schema, data_dirs): rebuild = False msg = "" path = get_metric_path(metric, data_dirs) print path with open(path) as f: header = kenshin.header(f) retentions = schema.archives old_retentions = [(r['sec_per_point'], r['count']) for r in header['archive_list']] if retentions != old_retentions: rebuild = True msg += "retentions:\n%s -> %s" % (retentions, old_retentions) if not rebuild: print 'No Operation Needed.' else: print msg now = int(time.time()) tmpfile = path + '.tmp' if os.path.exists(tmpfile): print 'Removing previous temporary database file: %s' % tmpfile os.unlink(tmpfile) print 'Creating new kenshin database: %s' % tmpfile kenshin.create(tmpfile, [''] * len(header['tag_list']), schema.archives, header['x_files_factor'], Agg.get_agg_name(header['agg_id'])) for i, t in enumerate(header['tag_list']): kenshin.add_tag(t, tmpfile, i) size = os.stat(tmpfile).st_size old_size = os.stat(tmpfile).st_size print 'Created: %s (%d bytes, was %d bytes)' % (tmpfile, size, old_size) print 'Migrating data to new kenshin database ...' for archive in header['archive_list']: from_time = now - archive['retention'] + archive['sec_per_point'] until_time = now _, timeinfo, values = kenshin.fetch(path, from_time, until_time) datapoints = zip(range(*timeinfo), values) datapoints = [[p[0], list(p[1])] for p in datapoints if p[1]] for ts, values in datapoints: for i, v in enumerate(values): if v is None: values[i] = NULL_VALUE kenshin.update(tmpfile, datapoints) backup = path + '.bak' print 'Renaming old database to: %s' % backup os.rename(path, backup) print 'Renaming new database to: %s' % path try: os.rename(tmpfile, path) except: os.rename(backup, path) raise IOError('Operation failed, restoring backup')