Beispiel #1
0
    def __init__(self, jsrc_path):
        """
        :param jsrc_path: (str) Path that contains compiled reeb_graph java project
                                (https://github.com/dbespalov/reeb_graph)
        """
        self.jsrc_path = jsrc_path

        if not jpype.isJVMStarted():
            jpype.startJVM(classpath=[jsrc_path], convertStrings=True)
        elif not jpype.isThreadAttachedToJVM():
            jpype.attachThreadToJVM()

        # These imports are activated by jpype after starting the JVM
        from java.lang import System
        from java.io import PrintStream, File
        # Disable java output.
        System.setOut(PrintStream(
            File('/dev/null')))  # NUL for windows, /dev/null for unix

        self.erg = jpype.JClass('ExtractReebGraph')()
        self.crg = jpype.JClass('CompareReebGraph')()

        # Set defaults
        self.params = ['4000', '0.005', str(2**7), '0.5']

        # the reeb project tries to save a file in the working directory -> redirect to tmp briefly
        wd = Path.cwd()
        os.chdir('/tmp')
        self.erg.main(self.params[:3])
        self.crg.main(self.params)
        try:
            (Path.cwd() / 'log_{}_{}_{}_{}'.format(*self.params)).unlink()
        except FileNotFoundError:
            pass
        os.chdir(str(wd))
def __check_update_require_domain_restart(model_context):
    exit_code = 0
    try:
        # First we enable the stdout again and then redirect the stdoout to a string output stream
        # call isRestartRequired to get the output, capture the string and then silence wlst output again
        #

        __wlst_helper.enable_stdout()
        sostream = StringOutputStream()
        System.setOut(PrintStream(sostream))
        restart_required = __wlst_helper.is_restart_required()
        is_restartreq_output = sostream.get_string()
        __wlst_helper.silence()
        if model_context.is_rollback_if_restart_required(
        ) and restart_required:
            __wlst_helper.cancel_edit()
            __logger.severe('WLSDPLY_09015', is_restartreq_output)
            exit_code = CommandLineArgUtil.PROG_ROLLBACK_IF_RESTART_EXIT_CODE
        else:
            __wlst_helper.save()
            __wlst_helper.activate()
            if restart_required:
                exit_code = CommandLineArgUtil.PROG_RESTART_REQUIRED
    except BundleAwareException, ex:
        __release_edit_session_and_disconnect()
        raise ex
 def _generate(self, title, *args):
     System.setOut(PrintStream(NullOutputStream()))
     index = self.widget.indexOfTab(title)
     rect = self.widget.getBoundsAt(index)
     operator = jemmy.operators.ComponentOperator(self.widget.widget)
     System.setOut(out_orig)
     operator.clickForPopup(rect.x + rect.width/2, rect.y + rect.height/2)
 def CommandEntered(self, event):
     if event.source.text == 'exit':
         # let's kill the frame
         System.setOut(self.__outOriginal)
         System.setErr(self.__errOriginal)
         self.dispose()
     else:
         # echo the input to the text area using the printstream
         s = 'in : ' + event.source.text
         self.printStream.println(s)
         
         # try the embedded interp (the getLocals was just to see if I could interact with it)
         # it's not returning anything
         a = self.interpreter.getLocals()
         self.printWriter.println(a)
         self.interpreter.exec(event.source.txt)
         
         # try the main interp (the getLocals was just to see if I could interact with it)
         # it's not returning anything
         a = getLocals()
         self.printWriter.println(a)
         exec(event.source.txt)
         
         # set the input text blank and give it focus back. we don't get here if we try to use 
         # the interpreter, though we do when we don't try to interact with the interpreter 
         self.inputField.setText('')
         self.inputField.requestFocus()
Beispiel #5
0
def compile(wd,args):
    print "changing working directory to %s" % wd
    System.setProperty("user.dir",wd);
    print args
    outstream = ByteArrayOutputStream()
    errstream = ByteArrayOutputStream()
    System.setOut(PrintStream(outstream))
    System.setErr(PrintStream(errstream))
    try: Tom.exec(config + args)
    except: pass
    return str((outstream.toString(),errstream.toString()))
Beispiel #6
0
 def __init__(self, *args, **kwargs):
     super(VTLKernel, self).__init__(*args, **kwargs)
     startJVM()
     from java.lang import System
     from java.io import ByteArrayOutputStream
     from java.io import PrintStream
     self.baos = ByteArrayOutputStream()
     ps = PrintStream(self.baos)
     System.setOut(ps)
     System.setErr(ps)
     it = JPackage('it')
     self.VTLSession = it.bancaditalia.oss.vtl.config.ConfigurationManager.getDefault().createSession()
 def __init__(self):
     JFrame.__init__(self, 'MagicDraw Jython Console')
     # grab the current out and err so we can set them back later (for the main java
     # System.out/err, not the PythonInterpreter
     self.__outOriginal = System.out
     self.__errOriginal = System.err
     # use the printStream as it updates the textarea
     System.setOut(self.printStream)
     System.setErr(self.printStream)
     self.CreateComponents()
     self.setVisible(True) 
     self.requestFocus()
     self.inputField.requestFocus()
Beispiel #8
0
def astral_tree(args):
    astral, input_file, output_file = args
    print('ASTRAL processing %s has begun' % input_file)
    jpype.startJVM(jpype.getDefaultJVMPath(),
                   '-Djava.class.path=%s' % astral,
                   convertStrings=False)
    from java.lang import System
    from java.io import PrintStream, File
    System.setOut(PrintStream(File('/dev/null')))
    System.setErr(PrintStream(File('/dev/null')))
    jpype.imports.registerDomain('phylonet')
    from phylonet.coalescent import CommandLine
    CommandLine.main(['-i', input_file, '-o', output_file])
    jpype.shutdownJVM()
 def wrapper(*args):
     sysout = sys.stdout
     syserr = sys.stderr
     javaOut = System.out
     javaErr = System.err
     sys.stdout = DevNull()
     sys.stderr = DevNull()
     System.setOut(DevNull())
     System.setErr(DevNull())
     result = func(*args)
     sys.stdout = sysout
     sys.stderr = syserr
     System.setOut(javaOut)
     System.setErr(javaErr)
     return result
Beispiel #10
0
def main():
    sw = StackWindow("Show Exception Stack", 600, 400)
    handler = sw
    Thread.setDefaultUncaughtExceptionHandler(handler)
    System.setOut(sw.printStream)
    System.setErr(sw.printStream)

    swing.UIManager.setLookAndFeel(
        swing.UIManager.getSystemLookAndFeelClassName())

    frm = FrmMain(current_folder)
    frm.visible = True

    editor_app = frm
    plotjy.jyplot.figure_parent = editor_app.getFigureDock()
    #print plotjy.jyplot.figure_parent

    interp = frm.getConsoleDockable().getInterpreter()
    interp.getSystemState().path = sys.path
    interp.exec('import plotjy')
Beispiel #11
0
    def _set_interactive(self, interactive):
        # deactivate all handlers
        if not interactive:
            debug("disable default JVM output streams")
            if self.silent_streams is None:
                self.silent_streams = streams(SyslogOutputStream(), SyslogOutputStream(SyslogOutputStream.ERR))
            System.out.flush()
            System.err.flush()
            System.setOut(PrintStream(self.silent_streams.out, True))
            System.setErr(PrintStream(self.silent_streams.err, True))
        else:
            debug("enable default JVM output streams")
            System.out.flush()
            System.err.flush()
            System.setOut(self.original_streams.out)
            System.setErr(self.original_streams.err)

        # FIXME boadcast configuration change to connection managers

        if self.connection is not None and self.connection.jcli is not None:
            self.connection.jcli.set_silent(not interactive)
Beispiel #12
0
 def alt_handle(self):
     try:
         raw_xml = self.rfile.readline().strip()
         marc_xmlfile = NamedTemporaryFile(delete=False)
         marc_xmlfile.write(raw_xml)
         marc_xmlfile.close()
         base_uri =  INFO.get('base_uri','http://catalog/')
         args = [INFO.get('saxon_xqy'),
                 'marcxmluri={}'.format(
                 os.path.normpath(marc_xmlfile.name).replace("\\", "/")),
                 'baseuri={}'.format(INFO.get('base_uri',
                                     'http://catalog/')),
                 'serialization=rdfxml']
         query = saxon.Query()
         output_stream = ByteArrayOutputStream()
         System.setOut(PrintStream(output_stream))
         query.main(args)
         self.wfile.write(output_stream.toString().encode('ascii',
                                                      errors='ignore'))
         os.remove(marc_xmlfile.name)
     except:
         self.wfile.write("Error processing MARC XML:\n\t{}".format(sys.exc_info()[0]))
Beispiel #13
0
def connectMonkey():

    id = getId()
    device = getDevice()
    from java.io import File, PrintStream, ByteArrayOutputStream
    from java.lang import System
    outFile = ByteArrayOutputStream(100)
    errFile = ByteArrayOutputStream(100)
    System.setOut(PrintStream(outFile))
    System.setErr(PrintStream(errFile))

    # Helps start adb if not started
    # pid of monkeyrunner only exists starting sdk 2.3
    pid = getpid()
    if id is None:
        device = MonkeyRunner.waitForConnection()
    else:
        device = MonkeyRunner.waitForConnection(20, id)

    if pid is None:
        if outFile.size() > 0:
            # call connect again
            if id is None:
                device = MonkeyRunner.waitForConnection()
            else:
                device = MonkeyRunner.waitForConnection(20, id)
    print device
    # Need 2nd connection calls to get the device 
    # see defect http://code.google.com/p/android/issues/detail?id=16722 
    #if id is None:
    #    device = MonkeyRunner.waitForConnection()
    #else:
    #    device = MonkeyRunner.waitForConnection(20, id)

    setDevice(device)
    return device
Beispiel #14
0
def online_check_save_activate(model_context):
    """
    For online update and deploy, check if restart is required, then cancel or save and activate.
    :param model_context: used to perform checks
    :return: the exit code for the tool
    :raises BundleAwareException: if an error occurs during the process
    """
    _method_name = 'online_check_save_activate'
    exit_code = 0

    try:
        # First we enable the stdout again and then redirect the stdoout to a string output stream
        # call isRestartRequired to get the output, capture the string and then silence wlst output again

        _wlst_helper.enable_stdout()
        sostream = StringOutputStream()
        System.setOut(PrintStream(sostream))
        restart_required = _wlst_helper.is_restart_required()
        is_restartreq_output = sostream.get_string()
        _wlst_helper.silence()
        if model_context.is_cancel_changes_if_restart_required() and restart_required:
            _wlst_helper.cancel_edit()
            _logger.warning('WLSDPLY_09015', is_restartreq_output)
            exit_code = CommandLineArgUtil.PROG_CANCEL_CHANGES_IF_RESTART_EXIT_CODE
            list_non_dynamic_changes(model_context, is_restartreq_output)
        else:
            _wlst_helper.save()
            _wlst_helper.activate(model_context.get_model_config().get_activate_timeout())
            if restart_required:
                exit_code = CommandLineArgUtil.PROG_RESTART_REQUIRED
                list_non_dynamic_changes(model_context, is_restartreq_output)
                exit_code = list_restarts(model_context, exit_code)

    except BundleAwareException, ex:
        release_edit_session_and_disconnect()
        raise ex
Beispiel #15
0
def main():
    # Parse arguments
    import argparse
    parser = argparse.ArgumentParser(description='Runs a bayesian optimisation for some of the algorithms defined in the PTSP framework')
    parser.add_argument('--algorithm',
                        choices=["QD-MCTS", "S-MCTS", "MS-MCTS", "VanillaGA", "VanillaMCTS"],
                        help='The algorithm that should be optimized',
                        default="S-MCTS")
    parser.add_argument('--outputDir', 
                        default="./optimizationResults",
                        help='The output directory for all data generated by the optimization')
    parser.add_argument("--ptspPath",
                        default="./ptsp.jar",
                        help="The path to the .jar file containing the PTSP framework")
    parser.add_argument("--iters",
                        default="10",
                        type=int,
                        help="Number of parameter-points to test by the bayesian optimization")
    args = parser.parse_args()
    args.outputPath = f"{args.outputDir}/{args.algorithm}"
    
    # Find all previous logs for this optimization
    logs = glob.glob(f"{args.outputPath}/optimizationLogs*.json")
    csvLogs = glob.glob(f"{args.outputPath}/*.csv")
    
    # Launch the JVM
    jpype.startJVM()
    jpype.addClassPath(args.ptspPath)
    import framework.Optimization as optim
    
    # Move java output into a file
    from java.lang import System
    from java.io import PrintStream, FileOutputStream
    pathlib.Path(args.outputPath).mkdir(parents=True, exist_ok=True)
    System.setOut(PrintStream(FileOutputStream(f"{args.outputPath}/cout.txt", True)))
    
    # Algorithm specific data
    bounds = {
        "QD-MCTS" : {
                "lowER": (0.01, 10), # Exploration rate low-level search
                "highER": (0.01, 10), # Exploration rate high-level search
                "steps": (300, 600),  # Number of steps for low-level search
                "rd": (10, 30) # rolloutDepth
            },
        "S-MCTS" : {
                "cellSize": (5, 30), # Size of a cell in the subgoal grid (aka distance between subgoals)
                "er": (0.01, 10), # Exploration rate high-level search
                "steps": (300, 600),  # Number of steps for low-level search
                "rd": (10, 30) # rolloutDepth
            },
        "MS-MCTS" : {
                "cellSize": (5, 30), # Size of a cell in the subgoal grid (aka distance between subgoals)
                "er": (0.01, 10), # Exploration rate high-level search
                "steps": (300, 600),  # Number of steps for low-level search
                "rd": (10, 30) # rolloutDepth
            },
        "VanillaGA" : {
                "gl": (10, 30), # How many base-actions does a genome contain
                "ps": (1, 100), # How many genomes in one population
                "mr": (0.1, 0.9),  # Probability that an action is mutated
            },
        "VanillaMCTS" : {
                "er": (0.01, 10), # Exploration rate
                "rd": (10, 30), # RolloutDepth
            }
    }
    
    funcs = {
        "QD-MCTS" : lambda lowER, highER, steps, rd: execSafe(optim.runQD_MCTS, lowER, highER, round(steps), round(rd)),
        "S-MCTS" : lambda cellSize, er, steps, rd: execSafe(optim.runSMCTS, cellSize, er, round(steps), round(rd)),
        "MS-MCTS" : lambda cellSize, er, steps, rd: execSafe(optim.runMS_MCTS, cellSize, er, round(steps), round(rd)),
        "VanillaGA" : lambda gl, ps, mr: execSafe(optim.runVanillaGA, round(gl), round(ps), mr),
        "VanillaMCTS" : lambda er, rd: execSafe(optim.runVanillaMCTS, er, round(rd))
    }
    
    # Probe points for each algorithm, only one which I've used previously
    probes = {
        "QD-MCTS" : {"lowER": math.sqrt(2), "highER": 4, "steps": 400, "rd": 25},
        "S-MCTS" : {"cellSize": 20, "er": 4, "steps": 400, "rd": 25},
        "MS-MCTS" : {"cellSize": 20, "er": 4, "steps": 400, "rd": 25},
        "VanillaGA" : {"gl": 20, "ps": 50, "mr": 1. / 20},
        "VanillaMCTS" : {"er": math.sqrt(2), "rd": 12}
    }
    
    # Initialize optimization
    optim.setupRun(len(logs) * 11) # Different seed for each run
    optim.RUN_COUNTER = len(csvLogs) # Make sure java logs into a new csv file
    optim.NUM_TRIALS = 10
    optim.OutputDir = args.outputPath
    optim.m_mapNames = glob.glob("./maps/**/*.map", recursive=True)
    
    optimizer = BayesianOptimization(
        f=funcs[args.algorithm],
        pbounds=bounds[args.algorithm],
        random_state=len(logs) * 11, # Change behaviour for each run
    )
    print(f"Optimizing {args.algorithm} with bounds:")
    print(bounds[args.algorithm])
    
    # Probe if necessary
    init_points = 0
    if len(logs) == 0:
        print("Found no previous logs... Probing to improve results:")
        print(probes[args.algorithm])
        optimizer.probe(params=probes[args.algorithm], lazy=True)
        init_points = 5
    else: # If we found logs, load them
        print(f"Reading previous logs into optimizer...")
        load_logs(optimizer, logs=logs);
        for log in logs:
            print(f"Successfully loaded {log}")
    
    logger = JSONLogger(path=f"{args.outputPath}/optimizationLogs{len(logs) + 1}.json")
    optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)
    
    # Run optimization
    print(f"Starting optimisation for {args.algorithm}...")
    optimizer.maximize(init_points=init_points, n_iter=args.iters)
    print("Finished optimisation")
    print(optimizer.max)
 def _generate(self, argumentString):
     row, column = TableIndexer.getIndexer(self.widget.widget).getViewCellIndices(argumentString)
     System.setOut(PrintStream(NullOutputStream()))
     operator = jemmy.operators.JTableOperator(self.widget.widget)
     System.setOut(out_orig)
     operator.callPopupOnCell(row, column)
 def enable_output(self):
     JavaSystem.setOut(self._java_stdout)
 def selectFromPopupMenu(self):
     System.setOut(PrintStream(NullOutputStream()))
     operator = jemmy.operators.JMenuItemOperator(self.widget.widget)
     operator.push()
     System.setOut(out_orig)
 def _generate(self, *args):
     System.setOut(PrintStream(NullOutputStream()))
     operator = jemmy.operators.ComponentOperator(self.widget.widget)
     System.setOut(out_orig)
     operator.clickForPopup()
Beispiel #20
0
    def process(self):
        """
        run DMR, creating an output file divided by time
        """

        if self.named_args is not None:
            self.tfidf = self.named_args['tfidf']
            self.min_df = int(self.named_args['min_df'])
            self.stemming = self.named_args['stemming']
            self.topics = int(self.named_args['topics'])
            self.lang = self.named_args['lang']
        else:
            self.tfidf = True
            self.min_df = 5
            self.topics = 50
            self.stemming = True
            self.lang = 'en'

        self._setup_mallet_instances(tfidf=self.tfidf, stemming=self.stemming)

        os.chdir(self.mallet_out_dir)

        # from cc.mallet.topics.DMRTopicModel import main as DMRTopicModel
        from cc.mallet.topics import DMRTopicModel
        process_args = [self.instance_file, str(self.topics)]
        logging.info('begin DMR')

        start_time = time.time()
        self.parameter_file = os.path.join(self.mallet_out_dir,
                                           'dmr.parameters')
        self.state_file = os.path.join(self.mallet_out_dir, 'dmr.state.gz')
        if not self.dry_run:
            # DMRTopicModel(process_args)
            from java.io import File, PrintStream, FileOutputStream
            from java.lang import System

            self.progress_file.close()
            progress_file = File(self.progress_filename)
            System.setOut(PrintStream(FileOutputStream(progress_file)))

            from cc.mallet.types import InstanceList
            training = InstanceList.load(File(self.instance_file))
            numTopics = int(self.topics)
            lda = DMRTopicModel(numTopics)
            lda.setOptimizeInterval(100)
            lda.setTopicDisplay(100, 10)
            lda.addInstances(training)
            lda.estimate()
            lda.writeParameters(File(self.parameter_file))
            lda.printState(File(self.state_file))

        logging.info('DMR complete in ' + str(time.time() - start_time) +
                     ' seconds')

        self.topic_features = {}
        with codecs.open(self.parameter_file, 'r', encoding='utf-8') as f:
            topic = 0
            for line in f:
                new_topic = re.match('FEATURES FOR CLASS topic([0-9]+)', line)
                if new_topic is not None:
                    topic = int(new_topic.group(1))
                else:
                    if not topic in self.topic_features:
                        self.topic_features[topic] = {}
                    this_line = line.split(' ')
                    feature = this_line[1]
                    self.topic_features[topic][feature] = \
                        float(this_line[2])

        self.progress_file = file(self.progress_filename, 'r')
        self.progress_file.seek(0, os.SEEK_SET)
        self.alphas = {}
        for line in self.progress_file:
            if re.match('[0-9]+\t[0-9.]+', line) is not None:
                this_line = line.split('\t')
                topic = int(this_line[0])
                alpha = float(this_line[1])
                tokens = int(this_line[2])

                self.alphas[topic] = alpha

        self.alpha_sum = sum(self.alphas.values())

        self.topic_words = {}
        self.doc_topics = {}

        with gzip.open(self.state_file, 'rb') as state_file:
            state_file.next()
            for line in state_file:
                this_line = line.split(' ')
                topic = int(this_line[5])
                word = this_line[4]
                doc = int(this_line[0])
                position = int(this_line[2])

                if not doc in self.doc_topics:
                    self.doc_topics[doc] = {}
                if not topic in self.doc_topics[doc]:
                    self.doc_topics[doc][topic] = 0
                self.doc_topics[doc][topic] += 1

                if not topic in self.topic_words:
                    self.topic_words[topic] = {}
                if not word in self.topic_words[topic]:
                    self.topic_words[topic][word] = 0
                self.topic_words[topic][word] += 1

        # total_tokens = float(sum([sum(y.values()) for x, y in self.topic_words.iteritems()]))

        for topic in self.topic_words.keys():
            total = float(sum(self.topic_words[topic].values()))
            for k in self.topic_words[topic].keys():
                self.topic_words[topic][k] /= total

        top_N = 20
        top_topic_words = dict(
            (x,
             dict((word, y[word])
                  for word in self.argsort(y, reverse=True)[:top_N]))
            for (x, y) in self.topic_words.iteritems())
        wordProbs = [[{
            'text': word,
            'prob': prob
        } for (word, prob) in y.iteritems()]
                     for (x, y) in top_topic_words.iteritems()]

        DEFAULT_DOC_PROPORTIONS = [
            0.01,
            0.02,
            0.05,
            0.1,
            0.2,
            0.3,
            0.5,
        ]
        numDocumentsAtProportions = dict(
            (topic, dict((k, 0.0) for k in DEFAULT_DOC_PROPORTIONS))
            for topic in self.topic_words.keys())
        for (doc, topics) in self.doc_topics.iteritems():
            doc_length = sum(topics.values())
            for (topic, count) in topics.iteritems():
                proportion = (self.alphas[topic] + count) \
                    / (self.alpha_sum + doc_length)
                for min_proportion in DEFAULT_DOC_PROPORTIONS:
                    if proportion < min_proportion:
                        break
                    numDocumentsAtProportions[topic][min_proportion] += \
                        1

        allocationRatios = dict(
            (topic, proportions[0.5] / proportions[0.02])
            for (topic, proportions) in numDocumentsAtProportions.iteritems()
            if proportions[0.02] > 0.0)

        labels = dict((topic, {
            'label': self.argsort(words, reverse=True)[:3],
            'fulltopic': wordProbs[topic],
            'allocation_ratio': allocationRatios.get(topic, 0)
        }) for (topic, words) in top_topic_words.iteritems())

        doc_metadata = {}

        for doc in self.doc_topics.keys():
            total = float(sum(self.doc_topics[doc].values()))
            for k in self.doc_topics[doc].keys():
                self.doc_topics[doc][k] /= total

        for (id, topics) in self.doc_topics.iteritems():
            try:
                filename = self.docs[int(id)]

                itemid = self.metadata[filename]['itemID']

                doc_metadata[itemid] = \
                    {'label': self.metadata[filename]['label'],
                     'title': self.metadata[filename]['title']}

                freqs = topics
                main_topic = None
                topic_max = 0.0
                for i in freqs.keys():
                    if freqs[i] > topic_max:
                        main_topic = i
                        topic_max = freqs[i]
                doc_metadata[itemid]['main_topic'] = main_topic
                self.metadata[filename]["topics"] = freqs
            except KeyboardInterrupt:
                sys.exit(1)
            except:
                logging.error(traceback.format_exc())

        self.template_filename = os.path.join(self.cwd, 'templates',
                                              self.template_name + '.html')

        if getattr(self, "index", None) is not None:
            for term in self.index:
                if isinstance(self.index[term], set):
                    self.index[term] = list(self.index[term])
            self.index = dict(self.index)

        params = {
            "CATEGORICAL": self.categorical,
            "TOPIC_LABELS": labels,
            "TOPIC_COHERENCE": {},
            "TAGS": getattr(self, "tags", {}),
            "INDEX": getattr(self, "index", {})
        }

        self.write_html(params)
 def disable_output(self):
     self._java_stdout = JavaSystem.out
     JavaSystem.setOut(PrintStream(NoOutputStream()))
Beispiel #22
0
    def process(self):
        """
        run DMR, creating an output file divided by time
        """

        if self.named_args is not None:
            self.tfidf = self.named_args['tfidf']
            self.min_df = int(self.named_args['min_df'])
            self.stemming = self.named_args['stemming']
            self.topics = int(self.named_args['topics'])
            self.lang = self.named_args['lang']
        else:
            self.tfidf = True
            self.min_df = 5
            self.topics = 50
            self.stemming = True
            self.lang = 'en'

        self._setup_mallet_instances(tfidf=self.tfidf,
                stemming=self.stemming)

        os.chdir(self.mallet_out_dir)

        # from cc.mallet.topics.DMRTopicModel import main as DMRTopicModel
        from cc.mallet.topics import DMRTopicModel
        process_args = [self.instance_file, str(self.topics)]
        logging.info('begin DMR')

        start_time = time.time()
        self.parameter_file = os.path.join(self.mallet_out_dir,
                         'dmr.parameters')
        self.state_file = os.path.join(self.mallet_out_dir,
                         'dmr.state.gz')
        if not self.dry_run:
            # DMRTopicModel(process_args)
            from java.io import File, PrintStream, FileOutputStream
            from java.lang import System

            self.progress_file.close()
            progress_file = File(self.progress_filename)
            System.setOut(PrintStream(FileOutputStream(progress_file)))

            from cc.mallet.types import InstanceList
            training = InstanceList.load(File(self.instance_file))
            numTopics = int(self.topics)
            lda = DMRTopicModel(numTopics)
            lda.setOptimizeInterval(100)
            lda.setTopicDisplay(100, 10)
            lda.addInstances(training)
            lda.estimate()
            lda.writeParameters(File(self.parameter_file))
            lda.printState(File(self.state_file))

        logging.info('DMR complete in ' + str(time.time() - start_time)
                     + ' seconds')

        self.topic_features = {}
        with codecs.open(self.parameter_file, 'r', encoding='utf-8') as f:
            topic = 0
            for line in f:
                new_topic = re.match('FEATURES FOR CLASS topic([0-9]+)'
                        , line)
                if new_topic is not None:
                    topic = int(new_topic.group(1))
                else:
                    if not topic in self.topic_features:
                        self.topic_features[topic] = {}
                    this_line = line.split(' ')
                    feature = this_line[1]
                    self.topic_features[topic][feature] = \
                        float(this_line[2])

        self.progress_file = file(self.progress_filename, 'r')
        self.progress_file.seek(0, os.SEEK_SET)
        self.alphas = {}
        for line in self.progress_file:
            if re.match('[0-9]+\t[0-9.]+', line) is not None:
                this_line = line.split('\t')
                topic = int(this_line[0])
                alpha = float(this_line[1])
                tokens = int(this_line[2])

                self.alphas[topic] = alpha

        self.alpha_sum = sum(self.alphas.values())

        self.topic_words = {}
        self.doc_topics = {}

        with gzip.open(self.state_file, 'rb') as state_file:
            state_file.next()
            for line in state_file:
                this_line = line.split(' ')
                topic = int(this_line[5])
                word = this_line[4]
                doc = int(this_line[0])
                position = int(this_line[2])

                if not doc in self.doc_topics:
                    self.doc_topics[doc] = {}
                if not topic in self.doc_topics[doc]:
                    self.doc_topics[doc][topic] = 0
                self.doc_topics[doc][topic] += 1

                if not topic in self.topic_words:
                    self.topic_words[topic] = {}
                if not word in self.topic_words[topic]:
                    self.topic_words[topic][word] = 0
                self.topic_words[topic][word] += 1

        # total_tokens = float(sum([sum(y.values()) for x, y in self.topic_words.iteritems()]))

        for topic in self.topic_words.keys():
            total = float(sum(self.topic_words[topic].values()))
            for k in self.topic_words[topic].keys():
                self.topic_words[topic][k] /= total

        top_N = 20
        top_topic_words = dict((x, dict((word, y[word]) for word in
                               self.argsort(y, reverse=True)[:top_N]))
                               for (x, y) in
                               self.topic_words.iteritems())
        wordProbs = [[{'text': word, 'prob': prob} for (word, prob) in
                     y.iteritems()] for (x, y) in
                     top_topic_words.iteritems()]

        DEFAULT_DOC_PROPORTIONS = [
            0.01,
            0.02,
            0.05,
            0.1,
            0.2,
            0.3,
            0.5,
            ]
        numDocumentsAtProportions = dict((topic, dict((k, 0.0) for k in
                DEFAULT_DOC_PROPORTIONS)) for topic in
                self.topic_words.keys())
        for (doc, topics) in self.doc_topics.iteritems():
            doc_length = sum(topics.values())
            for (topic, count) in topics.iteritems():
                proportion = (self.alphas[topic] + count) \
                    / (self.alpha_sum + doc_length)
                for min_proportion in DEFAULT_DOC_PROPORTIONS:
                    if proportion < min_proportion:
                        break
                    numDocumentsAtProportions[topic][min_proportion] += \
                        1

        allocationRatios = dict((topic, proportions[0.5]
                                / proportions[0.02]) for (topic,
                                proportions) in
                                numDocumentsAtProportions.iteritems()
                                if proportions[0.02] > 0.0)

        labels = dict((topic, {'label': self.argsort(words,
                      reverse=True)[:3], 'fulltopic': wordProbs[topic],
                      'allocation_ratio': allocationRatios.get(topic,0)})
                      for (topic, words) in top_topic_words.iteritems())

        doc_metadata = {}

        for doc in self.doc_topics.keys():
            total = float(sum(self.doc_topics[doc].values()))
            for k in self.doc_topics[doc].keys():
                self.doc_topics[doc][k] /= total

        for (id, topics) in self.doc_topics.iteritems():
            try:
                filename = self.docs[int(id)]

                itemid = self.metadata[filename]['itemID']

                doc_metadata[itemid] = \
                    {'label': self.metadata[filename]['label'],
                     'title': self.metadata[filename]['title']}

                freqs = topics
                main_topic = None
                topic_max = 0.0
                for i in freqs.keys():
                    if freqs[i] > topic_max:
                        main_topic = i
                        topic_max = freqs[i]
                doc_metadata[itemid]['main_topic'] = main_topic
                self.metadata[filename]["topics"] = freqs
            except KeyboardInterrupt:
                sys.exit(1)
            except:
                logging.error(traceback.format_exc())

        self.template_filename = os.path.join(self.cwd, 'templates',
                self.template_name + '.html')

        if getattr(self, "index", None) is not None:
            for term in self.index:
                if isinstance(self.index[term], set):
                    self.index[term] = list(self.index[term])
            self.index = dict(self.index)

        params = {"CATEGORICAL": self.categorical,
                        "TOPIC_LABELS": labels,
                        "TOPIC_COHERENCE": {},
                        "TAGS": getattr(self, "tags", {}),
                        "INDEX": getattr(self, "index", {})
        }

        self.write_html(params)
Beispiel #23
0
#print 'Argument List:', str(sys.argv)
#count = 0 
#for arg in sys.argv:
#    print "ARG[%s]="%count + arg + " : CLASS=%s"%arg.__class__
#    count += 1

#Assuming args as: {file.py, data.csv, model.bn5, ['outs'], ['thrus']}
#TODO: add more robust input checks
if len(sys.argv) < 5:
    print 'Not enough args'

class NoOutputStream(OutputStream):
    def write(self, b, off, len): pass

data = sys.argv[1]
model = sys.argv[2]
outs = ast.literal_eval(sys.argv[3])
thrus = ast.literal_eval(sys.argv[4])

oldOut = System.out
System.setOut(PrintStream(NoOutputStream()))
import datadigest.inference.BatchInferenceFacade as BatchInferenceFacade
bi = BatchInferenceFacade(data, model, outs, thrus)
results = bi.runBatchInference()
length = results.available()
buff = zeros(length, 'b')
results.read(buff)
theChars = ''
for i in range(1,length+1):
    theChars += chr(buff[i-1])
print (str(theChars))
Beispiel #24
0
import os
import imp
import jpype
import jpype.imports
from jpype.types import *

if jpype.isJVMStarted() != True:
    jars = []
    for top, dirs, files in os.walk(
            imp.find_module('pdfextract')[1] + '/data'):
        for nm in files:
            if nm[-4:] == ".jar":
                jars.append(os.path.join(top, nm))
    jpype.addClassPath(os.pathsep.join(jars))
    jpype.startJVM(jpype.getDefaultJVMPath(), convertStrings=False)
    from java.lang import System
    from java.io import PrintStream, File
    System.setOut(PrintStream(File(os.devnull)))
Beispiel #25
0
config = ConfigParser.ConfigParser()
config.read( "{0}/asbmansnmp.cfg".format(os.path.dirname(os.path.abspath(__file__))))

# set up logging
logging.basicConfig( filename = config.get("settings","logfile"), level=logging.INFO,format='%(asctime)s:' + logging.BASIC_FORMAT)

servers = config.sections()
timeout = config.getint("settings","timeout")
debug  = config.getboolean("settings","debug")

#discard all java output, it would mess up pass_persist communication 
if not debug:
    nulloutput = PrintStream(NullOutputStream())
    javasystem.setErr(nulloutput)
    javasystem.setOut(nulloutput)

# list of information we want to write to files / snmp
properties = [ ['ActiveServers', 1], 
               ['RqDuration', ["Max","Avg"]], 
               ['RqWait', ["Max","Avg"]],
               ['ActiveClients', ['Now','Peak']],
               ['TotalRequests', 1],
               ['BusyServers', 1],
               ['AvailableServers', 1],
               ['ClientQueueDepth', ['Cur','Max']],
               ['LockedServers', 1],
               ['Status', 1],
               ['MaxAgents', 1],
               ['MaxClients', 1]               
             ]
    from org.robotframework.abbot.tester import Robot
except ImportError:
    sys.stderr.write("ERROR: Could not find RobotFramework SwingLibrary jar file. \n" +
                     "Please download it and add it to your CLASSPATH as described at :\n" +
                     "http://www.texttest.org/index.php?page=ui_testing&n=storytext_and_swing\n")
    sys.exit(1)
    
from org.robotframework.org.netbeans import jemmy

# Importing writes uninteresting stuff to stdout
out_orig = System.out
class NullOutputStream(OutputStream):
    def write(self, *args):
        pass

System.setOut(PrintStream(NullOutputStream()))
import SwingLibrary
swinglib = SwingLibrary()
System.setOut(out_orig)

# Uncomment for Abbot logs
#import abbot
#abbot.Log.init([ "--debug", "all" ])

def runKeyword(keywordName, *args):
    # Uncomment this code in order to debug SwingLibrary issues
    #f = open("swinglib.storytext", "a")
    #f.write("runKeyword" + repr((keywordName, list(args))) + "\n")
    return swinglib.runKeyword(keywordName, list(args))

def selectWindow(widget):
Beispiel #27
0
 def _generate(self, index):
     System.setOut(PrintStream(NullOutputStream()))
     rect = self.widget.getBoundsAt(index)
     operator = ComponentOperator(self.widget.widget)
     System.setOut(out_orig)
     operator.clickForPopup(rect.x + rect.width / 2, rect.y + rect.height / 2)
Beispiel #28
0
    os.path.dirname(os.path.abspath(__file__))))

# set up logging
logging.basicConfig(filename=config.get("settings", "logfile"),
                    level=logging.INFO,
                    format='%(asctime)s:' + logging.BASIC_FORMAT)

servers = config.sections()
timeout = config.getint("settings", "timeout")
debug = config.getboolean("settings", "debug")

#discard all java output, it would mess up pass_persist communication
if not debug:
    nulloutput = PrintStream(NullOutputStream())
    javasystem.setErr(nulloutput)
    javasystem.setOut(nulloutput)

# list of information we want to write to files / snmp
properties = [['ActiveServers', 1], ['RqDuration', ["Max", "Avg"]],
              ['RqWait', ["Max", "Avg"]], ['ActiveClients', ['Now', 'Peak']],
              ['TotalRequests', 1], ['BusyServers', 1],
              ['AvailableServers', 1], ['ClientQueueDepth', ['Cur', 'Max']],
              ['LockedServers', 1], ['Status', 1], ['MaxAgents', 1],
              ['MaxClients', 1]]
statusses = {"ACTIVE": 3, "STARTING": 2, "STOPPING": 1, "STOPPED": 0}


class Unbuffered:
    def __init__(self, stream):
        self.stream = stream
 def __del__(self):
     System.setOut(self.__outOriginal)
     System.setErr(self.__errOriginal)
     print 'Goodbye'
Beispiel #30
0
 def _generate(self, argument):
     row, column = argument
     System.setOut(PrintStream(NullOutputStream()))
     operator = JTableOperator(self.widget.widget)
     System.setOut(out_orig)
     operator.callPopupOnCell(row, column)
Beispiel #31
0
                                        model_context,
                                        aliases,
                                        wlst_mode=__wlst_mode)
    except DeployException, de:
        __release_edit_session_and_disconnect()
        raise de

    exit_code = 0

    try:
        # First we enable the stdout again and then redirect the stdoout to a string output stream
        # call isRestartRequired to get the output, capture the string and then silence wlst output again
        #
        __wlst_helper.enable_stdout()
        sostream = StringOutputStream()
        System.setOut(PrintStream(sostream))
        restart_required = __wlst_helper.is_restart_required()
        is_restartreq_output = sostream.get_string()
        __wlst_helper.silence()
        if model_context.is_rollback_if_restart_required(
        ) and restart_required:
            __wlst_helper.cancel_edit()
            __logger.severe('WLSDPLY_09015', is_restartreq_output)
            exit_code = CommandLineArgUtil.PROG_ROLLBACK_IF_RESTART_EXIT_CODE
        else:
            __wlst_helper.save()
            __wlst_helper.activate()
            if restart_required:
                exit_code = CommandLineArgUtil.PROG_RESTART_REQUIRED
    except BundleAwareException, ex:
        __release_edit_session_and_disconnect()
Beispiel #32
0
#    print "ARG[%s]="%count + arg + " : CLASS=%s"%arg.__class__
#    count += 1

#Assuming args as: {file.py, data.csv, model.bn5, ['outs'], ['thrus']}
#TODO: add more robust input checks
if len(sys.argv) < 5:
    print 'Not enough args'


class NoOutputStream(OutputStream):
    def write(self, b, off, len):
        pass


data = sys.argv[1]
model = sys.argv[2]
outs = ast.literal_eval(sys.argv[3])
thrus = ast.literal_eval(sys.argv[4])

oldOut = System.out
System.setOut(PrintStream(NoOutputStream()))
import datadigest.inference.BatchInferenceFacade as BatchInferenceFacade
bi = BatchInferenceFacade(data, model, outs, thrus)
results = bi.runBatchInference()
length = results.available()
buff = zeros(length, 'b')
results.read(buff)
theChars = ''
for i in range(1, length + 1):
    theChars += chr(buff[i - 1])
print(str(theChars))