def wrapper(*args, **kwargs): #Sanity check before we start using JPype if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() #Can't have too many sanity checks since something might be amiss if jpype.isThreadAttachedToJVM(): return_value = function(*args, **kwargs) #Detach from the JVM after we are done jpype.detachThreadFromJVM() return return_value
def weka_local_arff_to_weka_instances(input_dict): ''' Reads a dataset into a format suitable for WEKA methods ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() tmp = common.TemporaryFile(suffix='.arff') tmp.writeString(input_dict['arff']) try: class_index = int(input_dict['class_index']) except: class_index = None source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')(tmp.name) instances = source.getDataSet() if class_index is None: print 'Warning: class is set to the last attribute!' class_index = instances.numAttributes() - 1 elif class_index == -1: class_index = instances.numAttributes() - 1 instances.setClassIndex(class_index) return {'instances': common.serialize_weka_object(instances)}
def joystickTimerEvent( self ) : if self.__directConnection : if not jpype.isThreadAttachedToJVM() : jpype.attachThreadToJVM() if self.joystickActive : self.joystick.update() if not self.joystick.getButton(0) : if self.joystickPrevButtonDown : # if button was just recently let up self.joystickPrevButtonDown = False self.updateMovementCanvas( 0.0 , 0.0 ) else : self.joystickPrevButtonDown = True if self.joystick.getButton(1) : self.updateMovementCanvas( self.joystick.getAxis(0) , -self.joystick.getAxis(1) ) elif self.joystick.getButton(3) : self.updateMovementCanvas( self.joystick.getAxis(0) , -self.joystick.getAxis(1) / 2.0 ) elif self.joystick.getButton(2) : self.updateMovementCanvas( self.joystick.getAxis(0) , -self.joystick.getAxis(1) / 4.0 ) else : self.updateMovementCanvas( self.joystick.getAxis(0) , -self.joystick.getAxis(1) / 6.0 ) self.joystickTimer = Timer( self.joystickTimerPeriod , self.joystickTimerEvent ).start() else : #joystick is not active if self.joystickTimer : self.joystickTimer.stop() self.joystickTimer = None self.joystickPrevButtonDown = False self.updateMovementCanvas( 0.0 , 0.0 )
def weka_local_generic_learner(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass(input_dict['weka_class'])() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'Generic_Weka_learner': sclassifier}
def _jdbc_connect_jpype(jclassname, url, driver_args, jars, libs): import jpype if not jpype.isJVMStarted(): args = [] class_path = [] if jars: class_path.extend(jars) class_path.extend(_get_classpath()) if class_path: args.append('-Djava.class.path=%s' % os.path.pathsep.join(class_path)) if libs: # path to shared libraries libs_path = os.path.pathsep.join(libs) args.append('-Djava.library.path=%s' % libs_path) # jvm_path = ('/usr/lib/jvm/java-6-openjdk' # '/jre/lib/i386/client/libjvm.so') jvm_path = jpype.getDefaultJVMPath() global old_jpype if hasattr(jpype, '__version__'): try: ver_match = re.match('\d+\.\d+', jpype.__version__) if ver_match: jpype_ver = float(ver_match.group(0)) if jpype_ver < 0.7: old_jpype = True except ValueError: pass if old_jpype: jpype.startJVM(jvm_path, *args) else: jpype.startJVM(jvm_path, *args, ignoreUnrecognized=True, convertStrings=True) if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() if _jdbc_name_to_const is None: types = jpype.java.sql.Types types_map = {} for i in types.__javaclass__.getClassFields(): if old_jpype: const = i.getStaticAttribute() else: const = i.__get__(i) types_map[i.getName()] = const _init_types(types_map) global _java_array_byte if _java_array_byte is None: def _java_array_byte(data): return jpype.JArray(jpype.JByte, 1)(data) # register driver for DriverManager jpype.JClass(jclassname) if isinstance(driver_args, dict): Properties = jpype.java.util.Properties info = Properties() for k, v in driver_args.items(): info.setProperty(k, v) dargs = [ info ] else: dargs = driver_args return jpype.java.sql.DriverManager.getConnection(url, *dargs)
def weka_local_apply_classifier(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() # print("Instances: %s" % type(input_dict['instances'])) instances = common.deserialize_weka_object(input_dict['instances']) if instances.classIndex() == -1: instances.setClassIndex(instances.numAttributes() - 1) # last attribute is class classifier_serialized = input_dict['classifier'] predictions = [] try: classifier = common.deserialize_weka_object(classifier_serialized) for instance in instances: label_ind = int(classifier.classifyInstance(instance)) label = instances.attribute(instances.numAttributes() - 1).value(label_ind) predictions.append(label) return {'classes': predictions} except: raise Exception( "Classifier not built. Please use the Build Classifier widget first." )
def weka_local_libsvm(input_dict): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.functions.LibSVM')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'LibSVM_learner': sclassifier}
def weka_local_arff_to_weka_instances(input_dict): ''' Reads a dataset into a format suitable for WEKA methods ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() tmp = common.TemporaryFile(suffix='.arff') tmp.writeString(input_dict['arff']) try: class_index = int(input_dict['class_index']) except: class_index = None source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')( tmp.name) instances = source.getDataSet() if class_index is None: print 'Warning: class is set to the last attribute!' class_index = instances.numAttributes() - 1 elif class_index == -1: class_index = instances.numAttributes() - 1 instances.setClassIndex(class_index) return {'instances': common.serialize_weka_object(instances)}
def __init__(self,arff_string,weights,nb_seeds,nb_steps): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() Monostrategy.__init__(self,arff_string,weights) self.nb_seeds = int(nb_seeds) self.nb_steps = int(nb_steps)
def __init__(self, *args, **kwargs): if len(args) == 0: extractor = 'ArticleExtractor' elif len(args) == 1: extractor = args[0] else: raise Exception('Invalid extractor param') self.extractor_name = extractor if kwargs.get('url'): self.setUrl(kwargs['url']) elif kwargs.get('html'): self.setHtml(kwargs['html']) try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() self.extractor = jpype.JClass( "de.l3s.boilerpipe.extractors." + self.extractor_name).INSTANCE finally: lock.release()
def __init__(self, extractor='DefaultExtractor', **kwargs): if kwargs.get('url'): request = urllib2.urlopen(kwargs['url']) self.data = request.read() encoding = request.headers['content-type'].lower().split('charset=')[-1] if encoding.lower() == 'text/html': encoding = chardet.detect(self.data)['encoding'] self.data = unicode(self.data, encoding) elif kwargs.get('html'): self.data = kwargs['html'] if not isinstance(self.data, unicode): self.data = unicode(self.data, chardet.detect(self.data)['encoding']) else: raise Exception('No text or url provided') try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() self.extractor = jpype.JClass( "de.l3s.boilerpipe.extractors."+extractor).INSTANCE finally: lock.release() reader = StringReader(self.data) self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument() self.extractor.process(self.source)
def _jdbc_connect_jpype(jclassname, jars, libs, *driver_args): import jpype if not jpype.isJVMStarted(): args = [] class_path = [] if jars: class_path.extend(jars) class_path.extend(_get_classpath()) if class_path: args.append('-Djava.class.path=%s' % os.path.pathsep.join(class_path)) if libs: # path to shared libraries libs_path = os.path.pathsep.join(libs) args.append('-Djava.library.path=%s' % libs_path) # jvm_path = ('/usr/lib/jvm/java-6-openjdk' # '/jre/lib/i386/client/libjvm.so') jvm_path = jpype.getDefaultJVMPath() jpype.startJVM(jvm_path, *args) if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() if _jdbc_name_to_const is None: types = jpype.java.sql.Types types_map = {} for i in types.__javaclass__.getClassFields(): types_map[i.getName()] = i.getStaticAttribute() _init_types(types_map) global _java_array_byte if _java_array_byte is None: def _java_array_byte(data): return jpype.JArray(jpype.JByte, 1)(data) # register driver for DriverManager jpype.JClass(jclassname) return jpype.java.sql.DriverManager.getConnection(*driver_args)
def __init__(self, jars=[], jvm_started=False, mark_time_ranges=False, include_range=False): """Initializes GetTimeForEvt. """ self.mark_time_ranges = mark_time_ranges self.include_range = include_range self.jars = jars self._is_loaded = False self._lock = threading.Lock() if not jvm_started: self._classpath = self._create_classpath() self._start_jvm() try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() is not 1: jpype.attachThreadToJVM() self._lock.acquire() EvtInfoWrapper = jpype.JClass('STFnlp.getEvtInfo') #self._sutime = EvtTimeWrapper(self.mark_time_ranges, self.include_range) self._getinfo = EvtInfoWrapper() self._is_loaded = True finally: self._lock.release()
def start_jvm(): if not jpype.isJVMStarted(): jpype.startJVM(jpype.get_default_jvm_path(), '-Dfile.encoding=UTF8', '-ea', '-Xmx1024m') if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() global classpath_added if not classpath_added: add_classpaths([ f'{os.path.dirname(__file__)}/java/{lib}' for lib in [ 'poi-3.17.jar', 'poi-excelant-3.17.jar', 'poi-ooxml-3.17.jar', 'poi-ooxml-schemas-3.17.jar', 'poi-scratchpad-3.17.jar', 'lib/commons-codec-1.10.jar', 'lib/commons-collections4-4.1.jar', 'lib/commons-logging-1.2.jar', 'lib/log4j-1.2.17.jar', 'ooxml-lib/xmlbeans-2.6.0.jar', 'ooxml-lib/curvesapi-1.04.jar', ] ]) classpath_added = True
def __init__(self, extractor='DefaultExtractor', **kwargs): if kwargs.get('url'): response = requests.request('GET', kwargs['url'], headers=self.headers) self.data = response.text elif kwargs.get('html'): self.data = kwargs['html'] if not isinstance(self.data, unicode): self.data = unicode(self.data, charade.detect(self.data)['encoding']) else: raise Exception('No text or url provided') try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() self.extractor = jpype.JClass( "de.l3s.boilerpipe.extractors."+extractor).INSTANCE finally: lock.release() reader = StringReader(self.data) self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument() self.extractor.process(self.source)
def open(cls, connection): if connection.state == 'open': logger.debug('Connection is already open, skipping open.') return connection credentials = cls.get_credentials(connection.credentials) try: if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() jpype.java.lang.Thread.currentThread().setContextClassLoader(jpype.java.lang.ClassLoader.getSystemClassLoader()) C = jaydebeapi.connect('com.ibm.db2.jcc.DB2Driver', 'jdbc:db2://' + credentials.host + ':' + str(credentials.port) + '/' + credentials.database, [credentials.username, credentials.password], 'C:/Users/ilija/Downloads/db2jcc-db2jcc4.jar') connection.handle = C connection.state = 'open' except Exception as e: logger.debug("Got an error when attempting to open a postgres " "connection: '{}'" .format(e)) connection.handle = None connection.state = 'fail' raise dbt.exceptions.FailedToConnectException(str(e)) return connection
def __init__(self, extractor='DefaultExtractor', **kwargs): if kwargs.get('url'): request = urllib2.urlopen(kwargs['url']) self.data = request.read() encoding = request.headers['content-type'].lower().split( 'charset=')[-1] if encoding.lower() == 'text/html': encoding = chardet.detect(self.data)['encoding'] self.data = unicode(self.data, encoding) elif kwargs.get('html'): self.data = kwargs['html'] if not isinstance(self.data, unicode): self.data = unicode(self.data, chardet.detect(self.data)['encoding']) else: raise Exception('No text or url provided') try: if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." + extractor).INSTANCE finally: lock.release() reader = StringReader(self.data) self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument() self.extractor.process(self.source)
def _start_jvm(cls, jvm_path, jvm_options, driver_path, log4j_conf): if jvm_path is None: jvm_path = jpype.get_default_jvm_path() if driver_path is None: driver_path = os.path.join(cls._BASE_PATH, ATHENA_JAR) if log4j_conf is None: log4j_conf = os.path.join(cls._BASE_PATH, LOG4J_PROPERTIES) if not jpype.isJVMStarted(): _logger.debug('JVM path: %s', jvm_path) args = [ '-server', '-Djava.class.path={0}'.format(driver_path), '-Dlog4j.configuration=file:{0}'.format(log4j_conf) ] if jvm_options: args.extend(jvm_options) _logger.debug('JVM args: %s', args) if jpype.__version__.startswith("0.6"): jpype.startJVM(jvm_path, *args) else: jpype.startJVM(jvm_path, *args, ignoreUnrecognized=True, convertStrings=True) cls.class_loader = jpype.java.lang.Thread.currentThread().getContextClassLoader() if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() if not cls.class_loader: cls.class_loader = jpype.java.lang.Thread.currentThread().getContextClassLoader() class_loader = jpype.java.net.URLClassLoader.newInstance( [jpype.java.net.URL('jar:file:{0}!/'.format(driver_path))], cls.class_loader) jpype.java.lang.Thread.currentThread().setContextClassLoader(class_loader)
def run(self): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() HybridClassification = jp.JClass('jcl.learning.methods.multistrategy.samarah.HybridClassification') self.hybrid_classification = HybridClassification() for agent_python in self.agents_python: self.hybrid_classification.addAgent(agent_python.get_parameters(), agent_python.get_data()) self.hybrid_classification.classify() clusRes = self.hybrid_classification.getClusteringResult() temp_file_url = tempfile.mkdtemp() timenow = int(round(time.time() * 1000)) output_file_url=os.path.join(temp_file_url,"result%s.arff"%str(timenow)) ARFFWriter = jp.JClass('jcl.io.arff.ARFFWriter') writer = ARFFWriter(output_file_url,clusRes) writer.write() with open(output_file_url, 'r') as f: output_file = f.read() return output_file
def __init__(self, jars=[], jvm_started=False, mark_time_ranges=False, include_range=False): """Initializes SUTime. """ self.mark_time_ranges = mark_time_ranges self.include_range = include_range self.jars = jars self._is_loaded = False self._lock = threading.Lock() if not jvm_started: self._classpath = self._create_classpath() self._start_jvm() try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() is not 1: jpype.attachThreadToJVM() self._lock.acquire() SUTimeWrapper = jpype.JClass( 'edu.stanford.nlp.python.SUTimeWrapper') self._sutime = SUTimeWrapper(self.mark_time_ranges, self.include_range) self._is_loaded = True finally: self._lock.release()
def getExternalConnection(self): try: # Fix import jpype if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() jpype.java.lang.Thread.currentThread().setContextClassLoader( jpype.java.lang.ClassLoader.getSystemClassLoader()) conn = jaydebeapi.connect("com.ibm.db2.jcc.DB2Driver", "jdbc:db2://" "{rechnername}.is.inf.uni-due.de:50{gruppennummer}/{database}".format( rechnername=rechnername, gruppennummer=re.match(r"([a-z]+)([0-9]+)", username, re.I).groups()[1], database=database #user=username.strip() ), { 'user': username, 'password': password, 'securityMechanism': "3" }, os.path.join(os.getcwd(), 'jdbc-1.0.jar') ) #conn.autocommit = False return conn except Exception as e: print(e)
def __init__(self, extractor='DefaultExtractor', **kwargs): if kwargs.get('url'): request = urllib2.Request(kwargs['url'], headers=self.headers) connection = urllib2.urlopen(request, timeout=10) self.data = connection.read() encoding = connection.headers['content-type'].lower().split( 'charset=')[-1] if encoding.lower() == 'text/html': encoding = charade.detect(self.data)['encoding'] if encoding is None: encoding = 'utf-8' self.data = str(self.data, encoding, errors='ignore') elif kwargs.get('html'): self.data = kwargs['html'] if not isinstance(self.data, str): self.data = str(self.data, charade.detect(self.data)['encoding']) else: raise Exception('No text or url provided') try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." + extractor).INSTANCE finally: lock.release() reader = StringReader(self.data) self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument() self.extractor.process(self.source)
def _jdbc_connect_jpype(jclassname, additional_classpath, *args): import jpype # jpype.startJVM(jpype.getDefaultJVMPath(), '-Djava.class.path=C:/az_daten/workspaces/oksc-1.1.30-snapshot/oksc-selenium/src/test/resources/ojdbc14-10.2.0.3.0.jar') if not jpype.isJVMStarted(): # make vm classpath arg if (additional_classpath is not None): additional_classpath = '-Djava.class.path=' + additional_classpath print('starting jvm with ' + additional_classpath) jpype.startJVM(jpype.getDefaultJVMPath(), additional_classpath) if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() if _converters is None: types = jpype.java.sql.Types types_map = {} for i in types.__javaclass__.getClassFields(): types_map[i.getName()] = i.getStaticAttribute() _init_converters(types_map) global _java_array_byte if _java_array_byte is None: def _java_array_byte(data): return jpype.JArray(jpype.JByte, 1)(data) # register driver for DriverManager jpype.JClass(jclassname) return jpype.java.sql.DriverManager.getConnection(*args)
def _jdbc_connect_jpype(jclassname, *args): import jpype # Ensure JVN is running if not jpype.isJVMStarted(): jpype.startJVM(jpype.getDefaultJVMPath()) # Connect Jpype to JVM Thread if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() # Initialize Type Conversion Mapping if _converters is None: types = jpype.java.sql.Types types_map = {} for i in types.__javaclass__.getClassFields(): types_map[i.getName()] = i.getStaticAttribute() _init_converters(types_map) global _java_array_byte if _java_array_byte is None: def _java_array_byte(data): return jpype.JArray(jpype.JByte, 1)(data) # register driver for DriverManager jpype.JClass(jclassname) return jpype.java.sql.DriverManager.getConnection(*args)
def correlation_basedfeat_sel(bunch): """Correlation-based Feature Subset Selection, as implemented by the CfsSubsetEval class of Weka :param bunch: dataset :return: new dataset """ if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() # Instances data! data = utilities.convertBunchToWekaInstances(bunch) Filter = jp.JClass('weka.filters.Filter') AttributeSelection = jp.JClass( 'weka.filters.supervised.attribute.AttributeSelection') attsel_filter = AttributeSelection() CfsSubsetEval = jp.JClass('weka.attributeSelection.CfsSubsetEval') attsel_eval = CfsSubsetEval() GreedyStepwise = jp.JClass('weka.attributeSelection.BestFirst') attsel_search = GreedyStepwise() # attsel_search.setSearchBackwards(True) # True, true attsel_filter.setEvaluator(attsel_eval) attsel_filter.setSearch(attsel_search) attsel_filter.setInputFormat(data) new_instances = Filter.useFilter(data, attsel_filter) return utilities.convertWekaInstancesToBunch(new_instances)
def __init__(self, jvm_started=False, parse_datetime=False, minimum_heap_size='128m', maximum_heap_size='2048m'): """Initializes Duckling. """ self.parse_datetime = parse_datetime self._is_loaded = False self._lock = threading.Lock() if not jvm_started: self._classpath = self._create_classpath() self._start_jvm(minimum_heap_size, maximum_heap_size) try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() is not 1: jpype.attachThreadToJVM() self._lock.acquire() self.clojure = jpype.JClass('clojure.java.api.Clojure') # require the duckling Clojure lib require = self.clojure.var("clojure.core", "require") require.invoke(self.clojure.read("duckling.core")) finally: self._lock.release()
def import_dataset_from_arff(arff, class_index=None): '''Imports Dataset From an ARFF Textual Format :param arff: the data in ARFF textual format :param classIndex: the index of the class attribute :return: a dataset (Bunch) ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() tmp = common.TemporaryFile(suffix='.arff') tmp.writeString(arff) source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')( tmp.name) instances = source.getDataSet() if class_index is None: print 'Warning: class is set to the last attribute!' class_index = instances.numAttributes() - 1 elif class_index == -1: class_index = instances.numAttributes() - 1 instances.setClassIndex(class_index) return convert_weka_instances_to_bunch(instances)
def _start_jvm(cls, jvm_path, jvm_options, driver_path): if jvm_path is None: jvm_path = jpype.get_default_jvm_path() if driver_path is None: driver_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), ATHENA_JAR) if not jpype.isJVMStarted(): _logger.debug('JVM path: %s', jvm_path) args = ['-server', '-Djava.class.path={0}'.format(driver_path)] if jvm_options: args.extend(jvm_options) _logger.debug('JVM args: %s', args) jpype.startJVM(jvm_path, *args) cls.class_loader = jpype.java.lang.Thread.currentThread( ).getContextClassLoader() if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() if not cls.class_loader: cls.class_loader = jpype.java.lang.Thread.currentThread( ).getContextClassLoader() class_loader = jpype.java.net.URLClassLoader.newInstance( [jpype.java.net.URL('jar:file:{0}!/'.format(driver_path))], cls.class_loader) jpype.java.lang.Thread.currentThread().setContextClassLoader( class_loader)
def correlation_basedfeat_sel(bunch): """Correlation-based Feature Subset Selection, as implemented by the CfsSubsetEval class of Weka :param bunch: dataset :return: new dataset """ if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() # Instances data! data = utilities.convertBunchToWekaInstances(bunch) Filter = jp.JClass('weka.filters.Filter') AttributeSelection = jp.JClass('weka.filters.supervised.attribute.AttributeSelection') attsel_filter = AttributeSelection() CfsSubsetEval = jp.JClass('weka.attributeSelection.CfsSubsetEval') attsel_eval = CfsSubsetEval() GreedyStepwise = jp.JClass('weka.attributeSelection.BestFirst') attsel_search = GreedyStepwise() # attsel_search.setSearchBackwards(True) # True, true attsel_filter.setEvaluator(attsel_eval) attsel_filter.setSearch(attsel_search) attsel_filter.setInputFormat(data) new_instances = Filter.useFilter(data, attsel_filter) return utilities.convertWekaInstancesToBunch(new_instances)
def _get_connection(self, server, user, password, service, jdbc_driver, tags): try: # Check if the instantclient is available cx_Oracle.clientversion() except cx_Oracle.DatabaseError as e: # Fallback to JDBC use_oracle_client = False self.log.debug( 'Oracle instant client unavailable, falling back to JDBC: {}'. format(e)) connect_string = self.JDBC_CONNECT_STRING.format(server, service) else: use_oracle_client = True self.log.debug('Running cx_Oracle version {0}'.format( cx_Oracle.version)) connect_string = self.CX_CONNECT_STRING.format( user, password, server, service) try: if use_oracle_client: con = cx_Oracle.connect(connect_string) else: try: if jpype.isJVMStarted( ) and not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() jpype.java.lang.Thread.currentThread( ).setContextClassLoader( jpype.java.lang.ClassLoader.getSystemClassLoader()) con = jdb.connect(self.ORACLE_DRIVER_CLASS, connect_string, [user, password], jdbc_driver) except jpype.JException(jpype.java.lang.RuntimeException) as e: if "Class {} not found".format( self.ORACLE_DRIVER_CLASS) in str(e): msg = """Cannot run the Oracle check until either the Oracle instant client or the JDBC Driver is available. For the Oracle instant client, see: http://www.oracle.com/technetwork/database/features/instant-client/index.html You will also need to ensure the `LD_LIBRARY_PATH` is also updated so the libs are reachable. For the JDBC Driver, see: http://www.oracle.com/technetwork/database/application-development/jdbc/downloads/index.html You will also need to ensure the jar is either listed in your $CLASSPATH or in the yaml configuration file of the check. """ self.log.error(msg) raise self.log.debug("Connected to Oracle DB") self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=tags) except Exception as e: self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=tags) self.log.error(e) raise return con
def __init__(self, jsrc_path): """ :param jsrc_path: (str) Path that contains compiled reeb_graph java project (https://github.com/dbespalov/reeb_graph) """ self.jsrc_path = jsrc_path if not jpype.isJVMStarted(): jpype.startJVM(classpath=[jsrc_path], convertStrings=True) elif not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() # These imports are activated by jpype after starting the JVM from java.lang import System from java.io import PrintStream, File # Disable java output. System.setOut(PrintStream( File('/dev/null'))) # NUL for windows, /dev/null for unix self.erg = jpype.JClass('ExtractReebGraph')() self.crg = jpype.JClass('CompareReebGraph')() # Set defaults self.params = ['4000', '0.005', str(2**7), '0.5'] # the reeb project tries to save a file in the working directory -> redirect to tmp briefly wd = Path.cwd() os.chdir('/tmp') self.erg.main(self.params[:3]) self.crg.main(self.params) try: (Path.cwd() / 'log_{}_{}_{}_{}'.format(*self.params)).unlink() except FileNotFoundError: pass os.chdir(str(wd))
def setup_conn(): try: import jaydebeapi import jpype if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() jpype.java.lang.Thread.currentThread().setContextClassLoader( jpype.java.lang.ClassLoader.getSystemClassLoader()) con = jaydebeapi.connect( "oracle.jdbc.driver.OracleDriver", "jdbc:oracle:thin:@wmsdbtst01.sager.com:1521:MV10TST", ["TSTMOVE", "TSTMOVE"]) return con.cursor() else: jvm_path = jpype.getDefaultJVMPath() jpype.startJVM(jvm_path, '-Djava.class.path=C:\ojdbc10.jar') con = jaydebeapi.connect( "oracle.jdbc.driver.OracleDriver", "jdbc:oracle:thin:@wmsdbtst01.sager.com:1521:MV10TST", ["TSTMOVE", "TSTMOVE"]) return con.cursor() except Exception as e: print(e) print('no connection') return None
def __init__(self, extractor='DefaultExtractor', **kwargs): if kwargs.get('url'): request = urllib2.Request(kwargs['url'], headers=self.headers) connection = urllib2.urlopen(request) self.data = connection.read() encoding = connection.headers['content-type'].lower().split('charset=')[-1] if encoding.lower() == 'text/html': encoding = charade.detect(self.data)['encoding'] # self.data = unicode(self.data, 'gbk') #self.data = self.data.decode(encoding, 'ignore') try: self.data = unicode(self.data, charade.detect(self.data)['encoding']) except UnicodeError: encoding = charade.detect(self.data)['encoding'] self.data = self.data.decode(encoding, 'ignore') elif kwargs.get('html'): self.data = kwargs['html'] if not isinstance(self.data, unicode): try: self.data = unicode(self.data,'gbk') #self.data = unicode(self.data, charade.detect(self.data)['encoding']) #try: # self.data = unicode(self.data, charade.detect(self.data)['encoding']) except UnicodeError: encoding = charade.detect(self.data)['encoding'] print "charset is :",encoding self.data = self.data.decode(encoding, 'ignore') ## Extractor(extractor='ArticleExtractor',file='/tmp/a.html') elif kwargs.get('file'): Path = kwargs['file'] f = open(Path, 'r') self.data = f.read() f.close() if not isinstance(self.data, unicode): try: self.data = unicode(self.data, charade.detect(self.data)['encoding']) except UnicodeError: encoding = charade.detect(self.data)['encoding'] self.data = self.data.decode(encoding, 'ignore') else: raise Exception('No text or url provided') try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() self.extractor = jpype.JClass( "de.l3s.boilerpipe.extractors."+extractor).INSTANCE finally: lock.release() reader = StringReader(self.data) self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument() self.extractor.process(self.source)
def main(args, options): #=========================================================================== # Load the NL parsing MLN #=========================================================================== mln = MLN(mlnfile=os.path.join(prac.locations.pracmodules, 'nl_parsing', 'mln', 'predicates.mln'), grammar='PRACGrammar', logic='FuzzyLogic') #=========================================================================== # Load the Java VM #=========================================================================== if not java.isJvmRunning(): java.initJvm() if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() #=========================================================================== # # suppress the stderr outputs from the parser #=========================================================================== jpype.java.lang.System.setErr(jpype.java.io.PrintStream(os.devnull)) #=========================================================================== # Initialize the parser #=========================================================================== stanford_parser = StanfordParser(grammar_path) dbs = [] sentences = args for s in sentences: db = '' deps = stanford_parser.get_dependencies(json.loads(s), True) deps = map(str, deps) words = set() for d in deps: # replace : by _ in stanford predicates res = re.match('(!?)(.+)\((.+)\)$', d) if res: d = '{}{}({})'.format(res.group(1), res.group(2).replace(':', '_'), res.group(3)) _, pred, args = mln.logic.parse_literal(str(d)) words.update(args) db += '{}({})\n'.format(pred, ', '.join(args)) postags = stanford_parser.get_pos() pos = [] for pos in postags.values(): if not pos[0] in words: continue postagatom = 'has_pos({},{})'.format(pos[0], pos[1]) pos.append(postagatom) db += '{}\n'.format(postagatom) postags[pos[0]] = pos[1] dbs.append(db) result = '---\n'.join(dbs) if options.outfile is not None: with open(options.outfile, 'w+') as f: f.write(result) else: print result
def deserialize_weka_object(objString): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() d = jp.JClass('weka.core.SerializationHelper') tfile = TemporaryFile(flags='wb+') tfile.writeString(b64decode(objString)) return d.read(tfile.name)
def serialize_weka_object(obj): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() s = jp.JClass('weka.core.SerializationHelper') tfile = TemporaryFile(flags='wb+') s.write(tfile.name, obj) return b64encode(tfile.fp.read())
def print_classifier(self): if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: classifier = common.deserialize_weka_object(self.sclassifier) return classifier.toString() except: raise Exception("Only WEKA classifiers/models supported. Please provide a valid WEKA learner.")
def get_jdbc_connection(iotdbIp , iotdbUser , iotdbPassword): if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() jpype.java.lang.Thread.currentThread().setContextClassLoader(jpype.java.lang.ClassLoader.getSystemClassLoader()) connection = JDBC.connect('org.apache.iotdb.jdbc.IoTDBDriver', iotdbIp, [iotdbUser, iotdbPassword], 'iotdb-jdbc-0.11.3-SNAPSHOT-jar-with-dependencies.jar') return connection
def __init__(self, extractor='DefaultExtractor', **kwargs): if kwargs.get('url'): request = Request(kwargs['url'], headers=self.headers) connection = urlopen(request) self.data = connection.read() encoding = connection.headers['content-type'].lower().split( 'charset=')[-1] if encoding.lower() == 'text/html': encoding = chardet.detect(self.data)['encoding'] try: import gzip import StringIO data = StringIO.StringIO(self.data) gzipper = gzip.GzipFile(fileobj=data) self.data = gzipper.read() #self.data = gzip.decompress(self.data) except Exception as inst: #print inst pass try: self.data = unicode(self.data, encoding) except NameError: self.data = self.data.decode(encoding) elif kwargs.get('html'): self.data = kwargs['html'] try: if not isinstance(self.data, unicode): self.data = unicode(self.data, chardet.detect(self.data)['encoding']) except NameError: if not isinstance(self.data, str): self.data = self.data.decode( chardet.detect(self.data)['encoding']) else: raise Exception('No text or url provided') try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() if extractor == "KeepEverythingWithMinKWordsExtractor": kMin = kwargs.get("kMin", 1) # set default to 1 self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." + extractor)(kMin) else: self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." + extractor).INSTANCE finally: lock.release() reader = StringReader(self.data) self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument() self.extractor.process(self.source)
def __init__(self, extractor='DefaultExtractor', **kwargs): if kwargs.get('url'): request = urllib2.Request(kwargs['url'], headers=self.headers) # Version without headers # request = urllib2.Request(kwargs['url']) connection = urllib2.urlopen(request) self.data = connection.read() encoding = connection.headers['content-type'].lower().split('charset=')[-1] # Try requests # request = requests.get(kwargs['url'], headers=self.headers, verify=False) # self.data = request.text # encoding = request.headers['content-type'].lower().split('charset=')[-1] if encoding.lower() == 'text/html': encoding = charade.detect(self.data)['encoding'] try: self.data = unicode(self.data, encoding, errors='replace') except LookupError as e: print e import ipdb; ipdb.set_trace() # XXX BREAKPOINT elif kwargs.get('html'): self.data = kwargs['html'] if not isinstance(self.data, unicode): self.data = unicode(self.data, charade.detect(self.data)['encoding'], errors='replace') import ipdb; ipdb.set_trace() # XXX BREAKPOINT else: raise Exception('No text or url provided') try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() self.extractor = jpype.JClass( "de.l3s.boilerpipe.extractors."+extractor).INSTANCE finally: lock.release() reader = StringReader(self.data) self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument() self.extractor.process(self.source)
def attach_thread_to_jvm() -> None: "https://github.com/baztian/jaydebeapi/issues/14#issuecomment-261489331" import jpype if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() jpype.java.lang.Thread.currentThread().setContextClassLoader( jpype.java.lang.ClassLoader.getSystemClassLoader())
def apply_mapped_classifier_get_instances(weka_classifier, original_data, data): '''An advanced version of the Apply Classifier method. Addresses incompatible training and test data, and returns a dataset with predictions. :param weka_classifier: WekaClassifier object :param original_data: original training instances, bunch :param data: test instances, bunch :return: Dataset (Bunch) object with predictions and a textual report from the InputMappedClassifier class ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() try: classifier = common.deserialize_weka_object( weka_classifier.sclassifier) except: raise Exception( "Only WEKA classifiers/models supported. Please provide a valid WEKA learner." ) original_training_instances = ut.convert_bunch_to_weka_instances( original_data) instances = ut.convert_bunch_to_weka_instances(data) # serialize classifier with original instances to a file once again for the Mapped classifier tfile = common.TemporaryFile(flags='wb+') s = jp.JClass('weka.core.SerializationHelper') s.writeAll(tfile.name, [classifier, original_training_instances]) # construct a MappedClassifier mapped_classifier = jp.JClass( 'weka.classifiers.misc.InputMappedClassifier')() mapped_classifier.setIgnoreCaseForNames(True) mapped_classifier.setTrim(True) # mapped_classifier.setSuppressMappingReport(True) # mc.setModelHeader(original_training_instances) mapped_classifier.setModelPath(tfile.name) predictions = [] try: for instance in instances: label = int(mapped_classifier.classifyInstance(instance)) predictions.append(label) data["targetPredicted"] = predictions except: raise Exception( "Classifier not built. Please use the Build Classifier widget first." ) report = mapped_classifier.toString() if MAPPING_REPORT_START in report: report = report[report.index(MAPPING_REPORT_START):] return data, report
def __init__(self, extractor='DefaultExtractor', **kwargs): if kwargs.get('url'): # Correctly encode url url = unicode(kwargs['url']) if re_rus.search(url): url = re_http.sub("", url) url = re_slash.sub("", url) url = url.encode("idna") url = "http://" + url # Set header h = {'User-Agent': self.headers[0], 'Accept': '*/*'} # Download the page request = urllib2.Request(url, headers=h) connection = urllib2.urlopen(request) self.data = connection.read() encoding = connection.headers['content-type'].lower().split( 'charset=')[-1] # Decode the page contents in the correct encoding if self.data is None: raise Exception('Html data cannot be extracted.') if encoding.lower() == 'text/html': encoding = charade.detect(self.data)['encoding'] old = encoding encoding = re_enc_error.sub("", encoding) encoding = re_enc_error2.sub("", encoding) encoding = re_enc_win.sub("windows-1251", encoding) if re_enc_def.search(encoding): encoding = DEFAULT_ENCODING self.data = unicode(self.data, encoding, "ignore") connection.close() elif kwargs.get('html'): self.data = kwargs['html'] if not isinstance(self.data, unicode): self.data = unicode(self.data, charade.detect(self.data)['encoding']) else: raise Exception('No text or url provided') try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." + extractor).INSTANCE finally: lock.release() reader = StringReader(self.data) self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument() self.extractor.process(self.source)
def weka_local_jrip(input_dict): '''The RIPPER rule learner by Weka ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.rules.JRip')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'JRip_learner': sclassifier}
def weka_local_k_star(input_dict): '''Instance-Based learner K* by Weka ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.lazy.KStar')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'KStar_learner': sclassifier}
def weka_local_zeror(input_dict): '''Weka's rulesZeroR classifier: predicts the mean (for a numeric class) or the mode (for a nominal class). ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.rules.ZeroR')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'classifier': sclassifier}
def weka_local_naive_bayes(input_dict): '''Naive Bayes classifier provided by Weka. Naive Bayes is a simple probabilistic classifier based on applying the Bayes' theorem. ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.bayes.NaiveBayes')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'Naive_Bayes_learner': sclassifier}
def weka_local_random_forest(input_dict): '''Random Forest learner by Weka ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.RandomForest')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'RandomForest_learner': sclassifier}
def weka_local_rep_tree(input_dict): '''A REP Tree, which is a fast decision tree learner. Builds a decision/regression tree using information gain/variance and prunes it using reduced-error pruning ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.REPTree')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'REPTree_learner': sclassifier}
def weka_local_multilayer_perceptron(input_dict): '''Feedforward artificial neural network, using backpropagation to classify instances ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.functions.MultilayerPerceptron')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'Multilayer_Perceptron_learner': sclassifier}
def weka_local_smo(input_dict): '''A support vector classifier, trained using the Sequential Minimal Optimization (SMO) algorithm ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.functions.SMO')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'SMO_learner': sclassifier}
def weka_local_j48(input_dict): '''Weka decision tree learner J48 ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.J48')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'J48_learner': sclassifier}
def weka_local_random_tree(input_dict): '''A tree that considers K randomly chosen attributes at each node, and performs no pruning ''' if not jp.isThreadAttachedToJVM(): jp.attachThreadToJVM() model = jp.JClass('weka.classifiers.trees.RandomTree')() model.setOptions(common.parse_options(input_dict['params'])) sclassifier = common.serialize_weka_object(model) return {'RandomTree_learner': sclassifier}
def __getattribute__(self, name): if not jpype.isJVMStarted(): object.__getattribute__(self, 'init_JVM')() if not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() if not object.__getattribute__(self, 'initialized'): object.__getattribute__(self, 'initialize')() return object.__getattribute__(self, name)
def __init__(self, extractor='DefaultExtractor', **kwargs): if kwargs.get('url'): # Correctly encode url url = unicode(kwargs['url']) if re_rus.search(url): url = re_http.sub("", url) url = re_slash.sub("", url) url = url.encode("idna") url = "http://" + url # Set header h = {'User-Agent':self.headers[0], 'Accept':'*/*'} # Download the page request = urllib2.Request(url, headers=h) connection = urllib2.urlopen(request) self.data = connection.read() encoding = connection.headers['content-type'].lower().split('charset=')[-1] # Decode the page contents in the correct encoding if self.data is None: raise Exception('Html data cannot be extracted.') if encoding.lower() == 'text/html': encoding = charade.detect(self.data)['encoding'] old = encoding encoding = re_enc_error.sub("", encoding) encoding = re_enc_error2.sub("", encoding) encoding = re_enc_win.sub("windows-1251", encoding) if re_enc_def.search(encoding): encoding = DEFAULT_ENCODING self.data = unicode(self.data, encoding, "ignore") connection.close() elif kwargs.get('html'): self.data = kwargs['html'] if not isinstance(self.data, unicode): self.data = unicode(self.data, charade.detect(self.data)['encoding']) else: raise Exception('No text or url provided') try: # make it thread-safe if threading.activeCount() > 1: if jpype.isThreadAttachedToJVM() == False: jpype.attachThreadToJVM() lock.acquire() self.extractor = jpype.JClass( "de.l3s.boilerpipe.extractors."+extractor).INSTANCE finally: lock.release() reader = StringReader(self.data) self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument() self.extractor.process(self.source)