Example #1
0
def archive(project, tablename, datestring):
    datalogger = DataLogger(basedir, project, tablename)
    caches = datalogger.get_caches(datestring)
    suffix = "%s/%s/%s\t" % (datestring, project, tablename)
    if caches["tsa"]["raw"] is None:
        logging.debug("%s RAW Data not found", suffix)
    else:
        if not os.path.isfile(caches["tsa"]["raw"]):
            logging.info("%s RAW does not exists, maybe archived or deleted", suffix)
            return
        logging.info("%s found raw file %s", suffix, caches["tsa"]["raw"])
        filebasename = os.path.basename(caches["tsa"]["raw"])
        parts= filebasename.split("_")
        filetablename = filebasename.replace("_%s" % parts[-1], "")
        filedatestring = parts[-1].split(".")[0]
        filesuffix = ".".join(parts[-1].split(".")[1:])
        logging.info("found tablename %s, datestring %s, ending %s", filetablename, filedatestring, filesuffix)
        if (filetablename != tablename) or (filedatestring != datestring):
            logging.error("the references raw file seems not to be the correct one")
        else:
            if filesuffix == "csv.gz":
                logging.info("raw file already zipped, this seems not to be the actual one")
                if (len(caches["tsa"]["keys"]) > 0) and (len(caches["tsastat"]["keys"]) > 0) and (len(caches["ts"]["keys"]) > 0) and (caches["quantile"]["exists"] is True):
                    logging.info("%s all generated archives found, raw data could be archived", suffix)
                    archivepath = os.path.join(args.archivedir, datestring, project, tablename)
                    archivefilename = os.path.join(archivepath, os.path.basename(caches["tsa"]["raw"]))
                    if not os.path.isdir(archivepath):
                        logging.info("creating directory %s", archivepath)
                        os.makedirs(archivepath)
                    logging.info("%s moving raw file to %s", suffix, archivefilename)
                    shutil.move(caches["tsa"]["raw"], archivefilename)
                else:
                    logging.info("%s not all archives available, generate them first, before archiving raw data", suffix)
    del caches
    del datalogger
Example #2
0
def gen_caches(project, tablename, datestring):
    datalogger = DataLogger(basedir, project, tablename)
    caches = datalogger.get_caches(datestring)
    suffix = "%s/%s/%s\t" % (datestring, project, tablename)
    data = None
    if caches["tsa"]["raw"] is None:
        if len(caches["tsa"]["keys"]) == 0:
            logging.info("%s RAW Data not availabale maybe archived, tsa exists already", suffix)
        else:
            logging.debug("%s RAW Data is missing, no tsa archive exists", suffix)
    else:
        if len(caches["tsa"]["keys"]) == 0:
            logging.info("%s TSA Archive missing, calling get_tsa and load_tsastats", suffix)
            data = datalogger.load_tsa(datestring)
        else:
            if len(caches["tsastat"]["keys"]) == 0:
                logging.info("%s TSASTAT Archive missing, calling load_tsastats", suffix)
                data = datalogger.load_tsastats(datestring)
            else:
                if len(caches["ts"]["keys"]) == 0:
                    logging.info("%s there are no ts archives, something went wrong, or tsa is completely empty, calling load_tsastats", suffix)
                    data = datalogger.load_tsastats(datestring)
                else:
                    logging.debug("%s All fine", suffix)
            if caches["quantile"]["exists"] is not True:
                logging.info("%s Quantile archive is missing, calling load_quantile", suffix)
                data = datalogger.load_quantile(datestring)
    del data
    del caches
    del datalogger
Example #3
0
    def get_caches(args):
        """
        return dictionary of caches available for this project/tablename/datestring combination

        ex: Datalogger/get_caches/{project}/{tablename}/{datestring}

        {
            "tsastat" : {
                "keys" : dictionary of available keys,
                "pattern" : filename pattern,
            },
            "tsstat" : {
                "keys" : dictionary of available keys,
                "pattern" : filename pattern,
            },
            "tsa":
                "keys" : dictionary of available keys,
                "pattern" : filename pattern,
            },
            "ts" : {
                "keys" : dictionary of available keys,
                "pattern" : filename pattern,
            },
            "raw" : None or filename of raw data,
        }

        if return_date["raw"] == null it means, there is no raw data available
        else if something (tsa,ts,tsastat,tsstat) is missing you can call get_tsastat to generate all caches

        returns:
        json(dictionary of caches and available data)
        """
        project, tablename, datestring = args[:3]
        datalogger = DataLogger(basedir, project, tablename)
        caches = {}
        try:
            caches = datalogger.get_caches(datestring)
        except StandardError as exc:
            logging.exception(exc)
            logging.error(caches)
        return json.dumps(caches)