Example #1
0
def saveRunData(path, config=None, feas_dict=None, rdd=None, batchSize=10):
    if path[-1] == '/':
        path = path[0:-1]

    # needed when not an s3 path, as the with open context assumes the folder already exists
    if not das_utils.isS3Path(path):
        das_utils.makePath(path)

    if config is not None:
        config_path = path + "/config.ini"
        logging.debug("Saving config to directory: {}".format(config_path))
        das_utils.saveConfigFile(config_path, config)

    if rdd is not None:
        logging.debug("Pickle Batch Size: {}".format(batchSize))
        data_path = path + "/data"
        logging.debug("Saving data to directory: {}".format(data_path))
        das_utils.savePickledRDD(data_path, rdd, batchSize=batchSize)

    if feas_dict is not None:
        for key in feas_dict.keys():
            feas_dict[key] = feas_dict[
                key].value  #this seems redundant, but is actually needed for the accumulator
        logging.info("Feasibility dictionary: {}".format(feas_dict))
        feas_path = path + "/feas_dict.json"
        logging.debug("Saving feas_dict to directory: {}".format(feas_path))
        das_utils.saveJSONFile(feas_path, feas_dict)
Example #2
0
    def saveRunData(self, path, feas_dict=None, rdd=None):
        self.annotate("saveRunData", verbose=True)
        if path[-1] == '/':
            path = path[0:-1]

        # RDD must be saved first, because it needs an empty prefix.
        if rdd is not None:
            output_datafile_name = os.path.join(path, self.output_datafname)

            if self.overwrite_flag:
                das_utils.clearPath(output_datafile_name)

            # needed when not an s3 path, as the with open context assumes the folder already exists
            if not das_utils.isS3Path(output_datafile_name):
                das_utils.makePath(output_datafile_name)

            output_metadata_file_name = output_datafile_name + "/0_metadata"  # sorts before 'p'
            output_header_file_name = output_datafile_name + "/1_header"  # sorts before 'p' but after '1'
            self.annotate(f"writing RDD to {output_datafile_name}")
            self.saveRDD(output_datafile_name, rdd)

            if self.write_metadata:
                now = datetime.datetime.now().isoformat()
                self.saveMetadata(path=output_metadata_file_name,
                                  now=now,
                                  count=rdd.count())
                self.saveHeader(path=output_header_file_name)

            if self.s3cat:
                self.annotate(f"combining {output_datafile_name} with s3cat")
                s3cat.s3cat(output_datafile_name,
                            demand_success=True,
                            suffix=self.s3cat_suffix,
                            verbose=self.s3cat_verbose)
                self.add_output_path(output_datafile_name + self.s3cat_suffix)
            else:
                self.add_output_path(output_datafile_name)

        config_path = os.path.join(path, C.CONFIG_INI)
        self.annotate("Saving config to directory: {}".format(config_path))
        das_utils.saveConfigFile(config_path, self.config)

        if feas_dict is not None:
            for key in feas_dict.keys():
                if hasattr(feas_dict[key], 'value'):
                    feas_dict[key] = feas_dict[
                        key].value  # this seems redundant, but is actually needed for the accumulator
            self.log_and_print(f"Feasibility dictionary: {feas_dict}")
            feas_path = os.path.join(path, C.FEAS_DICT_JSON)
            self.annotate(f"Saving feas_dict to directory: {feas_path}")
            das_utils.saveJSONFile(feas_path, feas_dict)
    def saveNoisyAnswers(self, nodes: __NodeDict__, postfix: str = "") -> None:
        """
        Save RDDs with geonodes as pickle files, by geolevel
        :param nodes: RDD or by-geolevel dictionary of noisy nodes RDDs
        :param postfix: postfix to add to default filename (e.g. "_ms" for minimal_schema run)
        :return:
        """
        for level, nodes_rdd in nodes.items():
            path = self.noisyPath(self.app_id, level, postfix)
            das_utils.savePickledRDD(path, nodes_rdd)

        das_utils.saveConfigFile(
            os.path.join(self.saveloc,
                         f"{self.app_id}-bylevel_pickled_rdds.config"),
            self.config)
Example #4
0
    def saveNoisyAnswers(self,
                         nodes: __NodeDict__,
                         repart_by_parent=True,
                         postfix: str = "") -> None:
        """
        Save RDDs with geonodes as pickle files, by geolevel
        :param repart_by_parent:
        :param nodes: RDD or by-geolevel dictionary of noisy nodes RDDs
        :param postfix: postfix to add to default filename (e.g. "_ms" for minimal_schema run)
        :return:
        """
        if self.setup.dvs_enabled:
            from programs.python_dvs.dvs import DVS_Singleton
            dvs_singleton = DVS_Singleton()
        else:
            dvs_singleton = None

        noisy_partitions_dict = self.setup.noisy_partitions_dict

        for level, nodes_rdd in nodes.items():
            self.annotate(f"Saving {level}{postfix} noisy measurements")
            path = self.noisyPath(self.app_id, level, postfix)
            num_noisy_parts = noisy_partitions_dict[level]
            rdd2save = nodes_rdd
            if repart_by_parent:
                self.annotate(f"Repartitioning by parent geocode")
                rdd2save = das_utils.partitionByParentGeocode(
                    nodes_rdd, nodes_rdd.getNumPartitions())
            elif num_noisy_parts > 0:
                self.annotate(
                    f"Coalescing noisy measurements to {num_noisy_parts} parts"
                )
                self.annotate(
                    f"NOTE: NOT actually Coalescing noisy measurements to {num_noisy_parts} parts"
                )
                # rdd2save = nodes_rdd.coalesce(num_noisy_parts)
            rdd2save = rdd2save.map(lambda node: node.zipNoisy())
            das_utils.savePickledRDD(path,
                                     rdd2save,
                                     dvs_singleton=dvs_singleton)

        das_utils.saveConfigFile(
            os.path.join(self.saveloc,
                         f"{self.app_id}-bylevel_pickled_rdds.config"),
            self.config)
Example #5
0
    def saveRunData(self, path, feas_dict=None, rdd=None):
        self.annotate("saveRunData", verbose=True)
        if path[-1] == '/':
            path = path[0:-1]

        # RDD must be saved first, because it needs an empty prefix.
        if rdd is not None:
            output_datafile_name      = os.path.join(path, self.output_datafname)

            if self.overwrite_flag:
                das_utils.clearPath(output_datafile_name)

            # needed when not an s3 path, as the with open context assumes the folder already exists
            if not das_utils.isS3Path(output_datafile_name):
                das_utils.makePath(output_datafile_name)

            output_metadata_file_name = output_datafile_name+"/0_metadata"  # sorts before 'p'
            output_header_file_name   = output_datafile_name+"/1_header"    # sorts before 'p' but after '1'
            self.annotate(f"writing RDD to {output_datafile_name}")
            self.saveRDD(output_datafile_name, rdd)

            if self.write_metadata:
                now = datetime.datetime.now().isoformat()
                self.saveMetadata(path=output_metadata_file_name, now=now, count=rdd.count())
                self.saveHeader(path=output_header_file_name)

            if self.s3cat:
                # If we combine the data with s3cat
                # note the combined filename in the annotated output, the DFXML file, the DVS object, and do it.

                self.annotate(f"combining {output_datafile_name} with s3cat")

                # Record this with DFXML
                ET.SubElement(self.das.dfxml_writer.doc, CC.DAS_S3CAT,
                              {'output_datafile_name':output_datafile_name,
                               'demand_success':'True',
                               'suffix':self.s3cat_suffix,
                               'verbose':str(self.s3cat_verbose)})

                self.add_output_path(output_datafile_name + self.s3cat_suffix)
                s3cat.s3_cat(output_datafile_name)
            else:
                # Otherwise just note the prefix in DFS and DFXML
                ET.SubElement(self.das.dfxml_writer.doc, CC.DAS_OUTPUT).text=output_datafile_name+"/"
                self.add_output_path(output_datafile_name + "/")


        config_path = os.path.join(path, f"{self.output_datafname}_{CC.CONFIG_INI}")

        self.annotate("Saving the flattened config to directory: {}".format(config_path))
        das_utils.saveConfigFile(config_path, self.config)
        f = io.StringIO()
        self.config.write(f)
        ET.SubElement(self.das.dfxml_writer.doc, CC.DAS_CONFIG).text = f.getvalue()


        if feas_dict is not None:
            for key in feas_dict.keys():
                if hasattr(feas_dict[key], 'value'):
                    feas_dict[key] = feas_dict[key].value  # this seems redundant, but is actually needed for the accumulator
            self.log_and_print(f"Feasibility dictionary: {feas_dict}")
            feas_path = os.path.join(path, f"{self.output_datafname}_{CC.FEAS_DICT_JSON}")
            self.annotate(f"Saving feas_dict to directory: {feas_path}")
            das_utils.saveJSONFile(feas_path, feas_dict)