Ejemplo n.º 1
0
 def render_save(self, varlist, outputfile=None):
     """Render and save to file"""
     rendered_str = self.render(varlist)
     out = Output(outputfile)
     out.write(rendered_str)
     out.close()
     return outputfile
Ejemplo n.º 2
0
 def walk(self):
     self.co.create()
     output = Output(self.kind)
     output.now_dir()
     print '[+] now path: ' + str(output.nowpath())
     output.out_dir()
     output.out_file()
     print '[+] target path: ' + str(self.__path)
     header = "target : " + self.__path + '\n'
     print '[+] please wait ...'
     output.write(header)
     for root, dirs, files in os.walk(self.__path):
         level = root.replace(self.__path, '').count(os.sep)
         indent = ' ' * 4 * (level)
         out = '{0}--=> {1}/'.format(indent, os.path.basename(root)) + '\n'
         output.write(out)
         subindent = ' ' * 4 * (level + 1)
         for f in files:
             #c_path = os.path.basename(root) + '/' + f
             f_name = f
             f_path = os.path.join(root, f)
             if self.checking(f_path) == 2:
                 f_md5 = self.md5(f_path)
                 self.co.into(self.kind, f_name, f_path, f_md5)
             out = '{0}=> {1}'.format(subindent, f) + '\n'
             output.write(out)
     self.co.close()
     output.close()
def run(*datasources, **options):
    """Executes given Robot data sources with given options.

    Data sources are paths to files and directories, similarly as when running
    pybot/jybot from command line. Options are given as keywords arguments and
    their names are same as long command line options without hyphens.

    Examples:
    run('/path/to/tests.html')
    run('/path/to/tests.html', '/path/to/tests2.html', log='mylog.html')

    Equivalent command line usage:
    pybot /path/to/tests.html
    pybot --log mylog.html /path/to/tests.html /path/to/tests2.html
    """
    STOP_SIGNAL_MONITOR.start()
    settings = RobotSettings(options)
    LOGGER.register_console_logger(settings['MonitorWidth'],
                                   settings['MonitorColors'])
    init_global_variables(settings)
    suite = TestSuite(datasources, settings)
    output = Output(settings)
    suite.run(output)
    LOGGER.info("Tests execution ended. Statistics:\n%s"
                % suite.get_stat_message())
    output.close(suite)
    if settings.is_rebot_needed():
        output, settings = settings.get_rebot_datasource_and_settings()
        ResultWriter(settings).write_robot_results(output)
    LOGGER.close()
    return suite
Ejemplo n.º 4
0
def run(*datasources, **options):
    """Executes given Robot data sources with given options.

    Data sources are paths to files and directories, similarly as when running
    pybot/jybot from command line. Options are given as keywords arguments and
    their names are same as long command line options without hyphens.

    Examples:
    run('/path/to/tests.html')
    run('/path/to/tests.html', '/path/to/tests2.html', log='mylog.html')

    Equivalent command line usage:
    pybot /path/to/tests.html
    pybot --log mylog.html /path/to/tests.html /path/to/tests2.html
    """
    STOP_SIGNAL_MONITOR.start()
    settings = RobotSettings(options)
    LOGGER.register_console_logger(settings['MonitorWidth'],
                                   settings['MonitorColors'])
    output = Output(settings)
    init_global_variables(settings)
    suite = TestSuite(datasources, settings)
    suite.run(output)
    LOGGER.info("Tests execution ended. Statistics:\n%s" %
                suite.get_stat_message())
    testoutput = RobotTestOutput(suite, settings)
    output.close(suite)
    if settings.is_rebot_needed():
        datasources, settings = settings.get_rebot_datasources_and_settings()
        if settings['SplitOutputs'] > 0:
            testoutput = SplitIndexTestOutput(suite, datasources[0], settings)
        else:
            testoutput = RebotTestOutput(datasources, settings)
        testoutput.serialize(settings)
    LOGGER.close()
    return suite
Ejemplo n.º 5
0
class AstroHTM(object):
    """
	This class represents the hierarchical temporal memory algorithm to be 
	applied to astronomy data.
	"""
    _LOGGER = logging.getLogger(__name__)

    #_SOURCE_FILE = './srcB_3to40_cl_barycorr_binned_multiD.fits'
    #_SOURCE_FILE = 'nu80002092008A01_x2_bary_binned10.fits'
    #_SOURCE_FILE = 'ni1103010157_0mpu7_cl_binned10.fits'

    _ANOMALY_SCALE_FACTOR = 300
    anomaly_count = 0
    encoder_resolution_set = False

    def __init__(self,
                 source_file,
                 min_var,
                 headers,
                 model_params,
                 output_path,
                 select_cols=False,
                 threshold=0.5):
        """
		Parameters:
		------------
		@param source_file
			The fits file name containing the data to run anomaly detection on.
			Do not include '.fits' at the end of the filename, it is implied.
			
		@param min_var (int)
			The minimum variance a spectrum column will have else it is dropped
		
		@param model_params (dictionary)
			The dictionary of parameters for the HTM model to used
			
		@param output_path (string)
			The filename to whioh the output will be written (.csv extected)
			
		@param select_cols (boolean)
			True if columns should be removed for having low variance, false otherwise
			Default value is False
			
		@param threshold (float from 0 to 1)
			Determines how high an anomaly score must be in order to register as an anomaly
			Default value is 0.5
		"""
        self._SOURCE_FILE = source_file
        self._MIN_VARIANCE = min_var
        self._SELECT_COLS = select_cols
        self._ANOMALY_THRESHOLD = threshold
        self.data = Data(self._SOURCE_FILE, headers)
        print len(headers), "headers given originally"
        #self.model = self.createModel()
        self.model_params = copy.deepcopy(model_params)
        self._OUTPUT_PATH = output_path
        self.data.set_input_stats()

    def get_anomaly_count(self):
        return self.anomaly_count

    def _setRandomEncoderResolution(self, minResolution=0.001):
        """
		Given model params, figure out the correct resolution for the
		RandomDistributed encoders. Modifies params in place.
		"""
        fields = self.data.headers[1:]

        for i, field in enumerate(fields):
            encoder = self.model_params["modelParams"]["sensorParams"][
                "encoders"][field]

            if encoder[
                    "type"] == "RandomDistributedScalarEncoder" and "numBuckets" in encoder:
                rangePadding = abs(self.data._INPUT_MAX[i] -
                                   self.data._INPUT_MIN[i]) * 0.2
                minValue = self.data._INPUT_MIN[i] - rangePadding
                maxValue = self.data._INPUT_MAX[i] + rangePadding
                resolution = max(minResolution, (maxValue - minValue) /
                                 encoder.pop("numBuckets"))
                encoder["resolution"] = resolution
                #print "RESOLUTION:", resolution

            self.model_params['modelParams']['sensorParams']['encoders'][
                field] = encoder

        self.encoder_resolution_set = True

        for i in self.model_params['modelParams']['sensorParams'][
                'encoders'].keys():
            if i not in self.data.headers:
                self.model_params['modelParams']['sensorParams'][
                    'encoders'].pop(i)

    def createModel(self):
        self._setRandomEncoderResolution()
        return ModelFactory.create(self.model_params)

    def setup_data(self):
        #print("SPECTRUM BEFORE PROCESSING: ", self.data.spectrum)

        if self._SELECT_COLS:
            self.data.select_cols(self._MIN_VARIANCE, self.model_params)
            self._SELECT_COLS = False

        #self.data.replace_bad_intervals()

        #print("SPECTRUM AFTER PROCESSING: ", self.data.spectrum)

    def setup_output(self):
        """
		Create Output object and write header line to csv file
		"""
        self.output = Output(self._OUTPUT_PATH)
        self.output.write([
            "timestamp",
            str(self.data.headers[1]), "scaled_score", "anomaly_score"
        ])

    def generate_model_input(self, index):
        """
		Generate the index-th input point for the model to analyze 
		Parameters:
		------------
		@param index (int)
			The row index to create the input from
		"""
        record = self.data.generate_record(index)
        self.modelInput = dict(zip(self.data.headers, record))

        for b in self.data.headers:
            self.modelInput[b] = float(self.modelInput[b])

        self.modelInput["float"] = self.modelInput["timestamp"]
        self.modelInput["timestamp"] = datetime.datetime.fromtimestamp(
            self.modelInput["timestamp"])

    def run_model(self):
        result = self.model.run(self.modelInput)
        anomalyScore = result.inferences['anomalyScore']
        scaledScore = anomalyScore * self._ANOMALY_SCALE_FACTOR
        return anomalyScore, scaledScore

    def output_results(self, anomalyScore, scaledScore):
        """
		Output one line of results corresponding to one datapoint's analysis by the model
		Parameters:
		------------
		@param anomalyScore (float)
			The score generated by the HTM model for a given modelInput
		
		@param scaledScore (float)
			The anomalyScore multiplied by the chosen scale factor
		"""
        if anomalyScore > self._ANOMALY_THRESHOLD:
            self.anomaly_count = self.anomaly_count + 1
            self._LOGGER.info("Anomaly detected at [%s]. Anomaly score: %f.",
                              self.modelInput["timestamp"], anomalyScore)
        self.output.write([
            self.modelInput["float"], self.modelInput[self.data.headers[1]],
            scaledScore,
            "%.3f" % anomalyScore
        ])

    def runAstroAnomaly(self):
        """
		Process data, setup Output, and build and run anomaly detection model
		"""
        print "...Running with min var of: ", self._MIN_VARIANCE
        self.setup_output()
        self.setup_data()

        self.model = self.createModel()
        self.model.enableInference({'predictedField': self.data.headers[1]
                                    })  # doesn't matter for anomaly detection

        for i in tqdm.tqdm(range(0, self.data.data_size, 1),
                           desc='% Complete'):
            self.generate_model_input(i)
            anomalyScore, scaledScore = self.run_model()
            self.output_results(anomalyScore, scaledScore)

        print "Anomaly Scores have been written to", self._OUTPUT_PATH
        print self.data.headers
        self.output.close()