def archive(project, tablename, datestring): datalogger = DataLogger(basedir, project, tablename) caches = datalogger.get_caches(datestring) suffix = "%s/%s/%s\t" % (datestring, project, tablename) if caches["tsa"]["raw"] is None: logging.debug("%s RAW Data not found", suffix) else: if not os.path.isfile(caches["tsa"]["raw"]): logging.info("%s RAW does not exists, maybe archived or deleted", suffix) return logging.info("%s found raw file %s", suffix, caches["tsa"]["raw"]) filebasename = os.path.basename(caches["tsa"]["raw"]) parts= filebasename.split("_") filetablename = filebasename.replace("_%s" % parts[-1], "") filedatestring = parts[-1].split(".")[0] filesuffix = ".".join(parts[-1].split(".")[1:]) logging.info("found tablename %s, datestring %s, ending %s", filetablename, filedatestring, filesuffix) if (filetablename != tablename) or (filedatestring != datestring): logging.error("the references raw file seems not to be the correct one") else: if filesuffix == "csv.gz": logging.info("raw file already zipped, this seems not to be the actual one") if (len(caches["tsa"]["keys"]) > 0) and (len(caches["tsastat"]["keys"]) > 0) and (len(caches["ts"]["keys"]) > 0) and (caches["quantile"]["exists"] is True): logging.info("%s all generated archives found, raw data could be archived", suffix) archivepath = os.path.join(args.archivedir, datestring, project, tablename) archivefilename = os.path.join(archivepath, os.path.basename(caches["tsa"]["raw"])) if not os.path.isdir(archivepath): logging.info("creating directory %s", archivepath) os.makedirs(archivepath) logging.info("%s moving raw file to %s", suffix, archivefilename) shutil.move(caches["tsa"]["raw"], archivefilename) else: logging.info("%s not all archives available, generate them first, before archiving raw data", suffix) del caches del datalogger
def gen_caches(project, tablename, datestring): datalogger = DataLogger(basedir, project, tablename) caches = datalogger.get_caches(datestring) suffix = "%s/%s/%s\t" % (datestring, project, tablename) data = None if caches["tsa"]["raw"] is None: if len(caches["tsa"]["keys"]) == 0: logging.info("%s RAW Data not availabale maybe archived, tsa exists already", suffix) else: logging.debug("%s RAW Data is missing, no tsa archive exists", suffix) else: if len(caches["tsa"]["keys"]) == 0: logging.info("%s TSA Archive missing, calling get_tsa and load_tsastats", suffix) data = datalogger.load_tsa(datestring) else: if len(caches["tsastat"]["keys"]) == 0: logging.info("%s TSASTAT Archive missing, calling load_tsastats", suffix) data = datalogger.load_tsastats(datestring) else: if len(caches["ts"]["keys"]) == 0: logging.info("%s there are no ts archives, something went wrong, or tsa is completely empty, calling load_tsastats", suffix) data = datalogger.load_tsastats(datestring) else: logging.debug("%s All fine", suffix) if caches["quantile"]["exists"] is not True: logging.info("%s Quantile archive is missing, calling load_quantile", suffix) data = datalogger.load_quantile(datestring) del data del caches del datalogger
def get_caches(args): """ return dictionary of caches available for this project/tablename/datestring combination ex: Datalogger/get_caches/{project}/{tablename}/{datestring} { "tsastat" : { "keys" : dictionary of available keys, "pattern" : filename pattern, }, "tsstat" : { "keys" : dictionary of available keys, "pattern" : filename pattern, }, "tsa": "keys" : dictionary of available keys, "pattern" : filename pattern, }, "ts" : { "keys" : dictionary of available keys, "pattern" : filename pattern, }, "raw" : None or filename of raw data, } if return_date["raw"] == null it means, there is no raw data available else if something (tsa,ts,tsastat,tsstat) is missing you can call get_tsastat to generate all caches returns: json(dictionary of caches and available data) """ project, tablename, datestring = args[:3] datalogger = DataLogger(basedir, project, tablename) caches = {} try: caches = datalogger.get_caches(datestring) except StandardError as exc: logging.exception(exc) logging.error(caches) return json.dumps(caches)