예제 #1
0
    def wrapper(*args, **kwargs):
        #Sanity check before we start using JPype
        if not jpype.isThreadAttachedToJVM():
            jpype.attachThreadToJVM()

        #Can't have too many sanity checks since something might be amiss
        if jpype.isThreadAttachedToJVM():
            return_value = function(*args, **kwargs)

        #Detach from the JVM after we are done
        jpype.detachThreadFromJVM()

        return return_value
예제 #2
0
def weka_local_arff_to_weka_instances(input_dict):
    '''
    Reads a dataset into a format suitable for WEKA methods
    '''

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    tmp = common.TemporaryFile(suffix='.arff')
    tmp.writeString(input_dict['arff'])

    try:
        class_index = int(input_dict['class_index'])
    except:
        class_index = None

    source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')(tmp.name)
    instances = source.getDataSet()

    if class_index is None:
        print 'Warning: class is set to the last attribute!'
        class_index = instances.numAttributes() - 1
    elif class_index == -1:
        class_index = instances.numAttributes() - 1

    instances.setClassIndex(class_index)

    return {'instances': common.serialize_weka_object(instances)}
예제 #3
0
    def joystickTimerEvent( self ) :
        if self.__directConnection :
            if not jpype.isThreadAttachedToJVM() :
                jpype.attachThreadToJVM()
        if self.joystickActive :
            self.joystick.update()
            if not self.joystick.getButton(0) :
                if self.joystickPrevButtonDown : # if button was just recently let up
                    self.joystickPrevButtonDown = False
                    self.updateMovementCanvas( 0.0 , 0.0 )
            else :
                self.joystickPrevButtonDown = True
                if self.joystick.getButton(1) :
                    self.updateMovementCanvas( self.joystick.getAxis(0) , -self.joystick.getAxis(1) )
                elif self.joystick.getButton(3) :
                    self.updateMovementCanvas( self.joystick.getAxis(0) , -self.joystick.getAxis(1) / 2.0 )
                elif self.joystick.getButton(2) :
                    self.updateMovementCanvas( self.joystick.getAxis(0) , -self.joystick.getAxis(1) / 4.0 )
                else :
                    self.updateMovementCanvas( self.joystick.getAxis(0) , -self.joystick.getAxis(1) / 6.0 )
            self.joystickTimer = Timer( self.joystickTimerPeriod , self.joystickTimerEvent ).start()

        else :  #joystick is not active
            if self.joystickTimer :
                self.joystickTimer.stop()
            self.joystickTimer = None
            self.joystickPrevButtonDown = False
            self.updateMovementCanvas( 0.0 , 0.0 )
예제 #4
0
def weka_local_generic_learner(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()
    model = jp.JClass(input_dict['weka_class'])()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'Generic_Weka_learner': sclassifier}
예제 #5
0
def _jdbc_connect_jpype(jclassname, url, driver_args, jars, libs):
    import jpype
    if not jpype.isJVMStarted():
        args = []
        class_path = []
        if jars:
            class_path.extend(jars)
        class_path.extend(_get_classpath())
        if class_path:
            args.append('-Djava.class.path=%s' %
                        os.path.pathsep.join(class_path))
        if libs:
            # path to shared libraries
            libs_path = os.path.pathsep.join(libs)
            args.append('-Djava.library.path=%s' % libs_path)
        # jvm_path = ('/usr/lib/jvm/java-6-openjdk'
        #             '/jre/lib/i386/client/libjvm.so')
        jvm_path = jpype.getDefaultJVMPath()
        global old_jpype
        if hasattr(jpype, '__version__'):
            try:
                ver_match = re.match('\d+\.\d+', jpype.__version__)
                if ver_match:
                    jpype_ver = float(ver_match.group(0))
                    if jpype_ver < 0.7:
                        old_jpype = True
            except ValueError:
                pass
        if old_jpype:
            jpype.startJVM(jvm_path, *args)
        else:
            jpype.startJVM(jvm_path, *args, ignoreUnrecognized=True,
                           convertStrings=True)
    if not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()
    if _jdbc_name_to_const is None:
        types = jpype.java.sql.Types
        types_map = {}
        for i in types.__javaclass__.getClassFields():
            if old_jpype:
                const = i.getStaticAttribute()
            else:
                const = i.__get__(i)
            types_map[i.getName()] = const
        _init_types(types_map)
    global _java_array_byte
    if _java_array_byte is None:
        def _java_array_byte(data):
            return jpype.JArray(jpype.JByte, 1)(data)
    # register driver for DriverManager
    jpype.JClass(jclassname)
    if isinstance(driver_args, dict):
        Properties = jpype.java.util.Properties
        info = Properties()
        for k, v in driver_args.items():
            info.setProperty(k, v)
        dargs = [ info ]
    else:
        dargs = driver_args
    return jpype.java.sql.DriverManager.getConnection(url, *dargs)
예제 #6
0
def weka_local_apply_classifier(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    # print("Instances: %s" % type(input_dict['instances']))
    instances = common.deserialize_weka_object(input_dict['instances'])

    if instances.classIndex() == -1:
        instances.setClassIndex(instances.numAttributes() -
                                1)  # last attribute is class

    classifier_serialized = input_dict['classifier']
    predictions = []
    try:
        classifier = common.deserialize_weka_object(classifier_serialized)
        for instance in instances:
            label_ind = int(classifier.classifyInstance(instance))
            label = instances.attribute(instances.numAttributes() -
                                        1).value(label_ind)
            predictions.append(label)

        return {'classes': predictions}
    except:
        raise Exception(
            "Classifier not built. Please use the Build Classifier widget first."
        )
예제 #7
0
def weka_local_libsvm(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()
    model = jp.JClass('weka.classifiers.functions.LibSVM')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'LibSVM_learner': sclassifier}
예제 #8
0
def weka_local_generic_learner(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()
    model = jp.JClass(input_dict['weka_class'])()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'Generic_Weka_learner': sclassifier}
예제 #9
0
def weka_local_arff_to_weka_instances(input_dict):
    '''
    Reads a dataset into a format suitable for WEKA methods
    '''

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    tmp = common.TemporaryFile(suffix='.arff')
    tmp.writeString(input_dict['arff'])

    try:
        class_index = int(input_dict['class_index'])
    except:
        class_index = None

    source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')(
        tmp.name)
    instances = source.getDataSet()

    if class_index is None:
        print 'Warning: class is set to the last attribute!'
        class_index = instances.numAttributes() - 1
    elif class_index == -1:
        class_index = instances.numAttributes() - 1

    instances.setClassIndex(class_index)

    return {'instances': common.serialize_weka_object(instances)}
예제 #10
0
def weka_local_libsvm(input_dict):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()
    model = jp.JClass('weka.classifiers.functions.LibSVM')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'LibSVM_learner': sclassifier}
예제 #11
0
파일: kmeans.py 프로젝트: Alshak/jcl
 def __init__(self,arff_string,weights,nb_seeds,nb_steps):
     if not jp.isThreadAttachedToJVM():
         jp.attachThreadToJVM()
     
     Monostrategy.__init__(self,arff_string,weights)
     self.nb_seeds = int(nb_seeds)
     self.nb_steps = int(nb_steps)
예제 #12
0
    def __init__(self, *args, **kwargs):

        if len(args) == 0:
            extractor = 'ArticleExtractor'
        elif len(args) == 1:
            extractor = args[0]
        else:
            raise Exception('Invalid extractor param')
        self.extractor_name = extractor

        if kwargs.get('url'):
            self.setUrl(kwargs['url'])
        elif kwargs.get('html'):
            self.setHtml(kwargs['html'])

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()
            
            self.extractor = jpype.JClass(
                "de.l3s.boilerpipe.extractors." + self.extractor_name).INSTANCE
        finally:
            lock.release()
    def __init__(self, extractor='DefaultExtractor', **kwargs):
        if kwargs.get('url'):
            request   = urllib2.urlopen(kwargs['url'])
            self.data = request.read()
            encoding  = request.headers['content-type'].lower().split('charset=')[-1]
            if encoding.lower() == 'text/html':
                encoding = chardet.detect(self.data)['encoding']
            self.data = unicode(self.data, encoding)
        elif kwargs.get('html'):
            self.data = kwargs['html']
            if not isinstance(self.data, unicode):
                self.data = unicode(self.data, chardet.detect(self.data)['encoding'])
        else:
            raise Exception('No text or url provided')

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()
            
            self.extractor = jpype.JClass(
                "de.l3s.boilerpipe.extractors."+extractor).INSTANCE
        finally:
            lock.release()
    
        reader = StringReader(self.data)
        self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument()
        self.extractor.process(self.source)
예제 #14
0
def _jdbc_connect_jpype(jclassname, jars, libs, *driver_args):
    import jpype
    if not jpype.isJVMStarted():
        args = []
        class_path = []
        if jars:
            class_path.extend(jars)
        class_path.extend(_get_classpath())
        if class_path:
            args.append('-Djava.class.path=%s' %
                        os.path.pathsep.join(class_path))
        if libs:
            # path to shared libraries
            libs_path = os.path.pathsep.join(libs)
            args.append('-Djava.library.path=%s' % libs_path)
        # jvm_path = ('/usr/lib/jvm/java-6-openjdk'
        #             '/jre/lib/i386/client/libjvm.so')
        jvm_path = jpype.getDefaultJVMPath()
        jpype.startJVM(jvm_path, *args)
    if not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()
    if _jdbc_name_to_const is None:
        types = jpype.java.sql.Types
        types_map = {}
        for i in types.__javaclass__.getClassFields():
            types_map[i.getName()] = i.getStaticAttribute()
        _init_types(types_map)
    global _java_array_byte
    if _java_array_byte is None:
        def _java_array_byte(data):
            return jpype.JArray(jpype.JByte, 1)(data)
    # register driver for DriverManager
    jpype.JClass(jclassname)
    return jpype.java.sql.DriverManager.getConnection(*driver_args)
예제 #15
0
    def __init__(self,
                 jars=[],
                 jvm_started=False,
                 mark_time_ranges=False,
                 include_range=False):
        """Initializes GetTimeForEvt.
        """
        self.mark_time_ranges = mark_time_ranges
        self.include_range = include_range
        self.jars = jars
        self._is_loaded = False
        self._lock = threading.Lock()

        if not jvm_started:
            self._classpath = self._create_classpath()
            self._start_jvm()

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() is not 1:
                    jpype.attachThreadToJVM()
            self._lock.acquire()

            EvtInfoWrapper = jpype.JClass('STFnlp.getEvtInfo')
            #self._sutime = EvtTimeWrapper(self.mark_time_ranges, self.include_range)
            self._getinfo = EvtInfoWrapper()
            self._is_loaded = True
        finally:
            self._lock.release()
예제 #16
0
def start_jvm():

    if not jpype.isJVMStarted():
        jpype.startJVM(jpype.get_default_jvm_path(), '-Dfile.encoding=UTF8',
                       '-ea', '-Xmx1024m')

    if not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()

    global classpath_added
    if not classpath_added:
        add_classpaths([
            f'{os.path.dirname(__file__)}/java/{lib}' for lib in [
                'poi-3.17.jar',
                'poi-excelant-3.17.jar',
                'poi-ooxml-3.17.jar',
                'poi-ooxml-schemas-3.17.jar',
                'poi-scratchpad-3.17.jar',
                'lib/commons-codec-1.10.jar',
                'lib/commons-collections4-4.1.jar',
                'lib/commons-logging-1.2.jar',
                'lib/log4j-1.2.17.jar',
                'ooxml-lib/xmlbeans-2.6.0.jar',
                'ooxml-lib/curvesapi-1.04.jar',
            ]
        ])
        classpath_added = True
예제 #17
0
    def __init__(self, extractor='DefaultExtractor', **kwargs):
        if kwargs.get('url'):
            response = requests.request('GET', kwargs['url'], headers=self.headers)
            self.data = response.text
        elif kwargs.get('html'):
            self.data = kwargs['html']
            if not isinstance(self.data, unicode):
                self.data = unicode(self.data, charade.detect(self.data)['encoding'])
        else:
            raise Exception('No text or url provided')

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()
            
            self.extractor = jpype.JClass(
                "de.l3s.boilerpipe.extractors."+extractor).INSTANCE
        finally:
            lock.release()
    
        reader = StringReader(self.data)
        self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument()
        self.extractor.process(self.source)
예제 #18
0
    def open(cls, connection):
        if connection.state == 'open':
            logger.debug('Connection is already open, skipping open.')
            return connection
        credentials = cls.get_credentials(connection.credentials)
        try:
            if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
                jpype.attachThreadToJVM()
                jpype.java.lang.Thread.currentThread().setContextClassLoader(jpype.java.lang.ClassLoader.getSystemClassLoader())
            
            C = jaydebeapi.connect('com.ibm.db2.jcc.DB2Driver',
                                    'jdbc:db2://' + credentials.host + ':' + str(credentials.port) + '/' + credentials.database,
                                    [credentials.username, credentials.password],
                                    'C:/Users/ilija/Downloads/db2jcc-db2jcc4.jar')
            connection.handle = C
            connection.state = 'open'

        except Exception as e:
            logger.debug("Got an error when attempting to open a postgres "
                         "connection: '{}'"
                         .format(e))

            connection.handle = None
            connection.state = 'fail'

            raise dbt.exceptions.FailedToConnectException(str(e))

        return connection
예제 #19
0
    def __init__(self, extractor='DefaultExtractor', **kwargs):
        if kwargs.get('url'):
            request = urllib2.urlopen(kwargs['url'])
            self.data = request.read()
            encoding = request.headers['content-type'].lower().split(
                'charset=')[-1]
            if encoding.lower() == 'text/html':
                encoding = chardet.detect(self.data)['encoding']
            self.data = unicode(self.data, encoding)
        elif kwargs.get('html'):
            self.data = kwargs['html']
            if not isinstance(self.data, unicode):
                self.data = unicode(self.data,
                                    chardet.detect(self.data)['encoding'])
        else:
            raise Exception('No text or url provided')

        try:
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()

            self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." +
                                          extractor).INSTANCE
        finally:
            lock.release()

        reader = StringReader(self.data)
        self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument()
        self.extractor.process(self.source)
예제 #20
0
 def _start_jvm(cls, jvm_path, jvm_options, driver_path, log4j_conf):
     if jvm_path is None:
         jvm_path = jpype.get_default_jvm_path()
     if driver_path is None:
         driver_path = os.path.join(cls._BASE_PATH, ATHENA_JAR)
     if log4j_conf is None:
         log4j_conf = os.path.join(cls._BASE_PATH, LOG4J_PROPERTIES)
     if not jpype.isJVMStarted():
         _logger.debug('JVM path: %s', jvm_path)
         args = [
             '-server',
             '-Djava.class.path={0}'.format(driver_path),
             '-Dlog4j.configuration=file:{0}'.format(log4j_conf)
         ]
         if jvm_options:
             args.extend(jvm_options)
         _logger.debug('JVM args: %s', args)
         if jpype.__version__.startswith("0.6"):
             jpype.startJVM(jvm_path, *args)
         else:
             jpype.startJVM(jvm_path, *args, ignoreUnrecognized=True, convertStrings=True)
         cls.class_loader = jpype.java.lang.Thread.currentThread().getContextClassLoader()
     if not jpype.isThreadAttachedToJVM():
         jpype.attachThreadToJVM()
         if not cls.class_loader:
             cls.class_loader = jpype.java.lang.Thread.currentThread().getContextClassLoader()
         class_loader = jpype.java.net.URLClassLoader.newInstance(
             [jpype.java.net.URL('jar:file:{0}!/'.format(driver_path))],
             cls.class_loader)
         jpype.java.lang.Thread.currentThread().setContextClassLoader(class_loader)
예제 #21
0
파일: samarah.py 프로젝트: Alshak/jcl
    def run(self):
        if not jp.isThreadAttachedToJVM():
            jp.attachThreadToJVM()
            
        HybridClassification = jp.JClass('jcl.learning.methods.multistrategy.samarah.HybridClassification')
        self.hybrid_classification = HybridClassification()
        
        for agent_python in self.agents_python:
            self.hybrid_classification.addAgent(agent_python.get_parameters(), agent_python.get_data())

        self.hybrid_classification.classify()
        clusRes = self.hybrid_classification.getClusteringResult()
        
        temp_file_url = tempfile.mkdtemp()
        timenow = int(round(time.time() * 1000))
        output_file_url=os.path.join(temp_file_url,"result%s.arff"%str(timenow))
        
        ARFFWriter = jp.JClass('jcl.io.arff.ARFFWriter')   
        writer = ARFFWriter(output_file_url,clusRes)
        writer.write()

        with open(output_file_url, 'r') as f:
            output_file = f.read()
        
        return output_file
예제 #22
0
def _jdbc_connect_jpype(jclassname, jars, libs, *driver_args):
    import jpype
    if not jpype.isJVMStarted():
        args = []
        class_path = []
        if jars:
            class_path.extend(jars)
        class_path.extend(_get_classpath())
        if class_path:
            args.append('-Djava.class.path=%s' %
                        os.path.pathsep.join(class_path))
        if libs:
            # path to shared libraries
            libs_path = os.path.pathsep.join(libs)
            args.append('-Djava.library.path=%s' % libs_path)
        # jvm_path = ('/usr/lib/jvm/java-6-openjdk'
        #             '/jre/lib/i386/client/libjvm.so')
        jvm_path = jpype.getDefaultJVMPath()
        jpype.startJVM(jvm_path, *args)
    if not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()
    if _jdbc_name_to_const is None:
        types = jpype.java.sql.Types
        types_map = {}
        for i in types.__javaclass__.getClassFields():
            types_map[i.getName()] = i.getStaticAttribute()
        _init_types(types_map)
    global _java_array_byte
    if _java_array_byte is None:
        def _java_array_byte(data):
            return jpype.JArray(jpype.JByte, 1)(data)
    # register driver for DriverManager
    jpype.JClass(jclassname)
    return jpype.java.sql.DriverManager.getConnection(*driver_args)
예제 #23
0
    def __init__(self,
                 jars=[],
                 jvm_started=False,
                 mark_time_ranges=False,
                 include_range=False):
        """Initializes SUTime.
        """
        self.mark_time_ranges = mark_time_ranges
        self.include_range = include_range
        self.jars = jars
        self._is_loaded = False
        self._lock = threading.Lock()

        if not jvm_started:
            self._classpath = self._create_classpath()
            self._start_jvm()

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() is not 1:
                    jpype.attachThreadToJVM()
            self._lock.acquire()

            SUTimeWrapper = jpype.JClass(
                'edu.stanford.nlp.python.SUTimeWrapper')
            self._sutime = SUTimeWrapper(self.mark_time_ranges,
                                         self.include_range)
            self._is_loaded = True
        finally:
            self._lock.release()
예제 #24
0
    def getExternalConnection(self):

        try:
            # Fix
            import jpype
            if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
                jpype.attachThreadToJVM()
                jpype.java.lang.Thread.currentThread().setContextClassLoader(
                    jpype.java.lang.ClassLoader.getSystemClassLoader())
            conn = jaydebeapi.connect("com.ibm.db2.jcc.DB2Driver",
                                      "jdbc:db2://"
                                      "{rechnername}.is.inf.uni-due.de:50{gruppennummer}/{database}".format(
                                          rechnername=rechnername,
                                          gruppennummer=re.match(r"([a-z]+)([0-9]+)", username, re.I).groups()[1],
                                          database=database
                                          #user=username.strip()
                                      ),
                                      {
                                          'user': username,
                                          'password': password,
                                          'securityMechanism': "3"
                                      },
                                      os.path.join(os.getcwd(), 'jdbc-1.0.jar')
                                      )
            #conn.autocommit = False
            return conn
        except Exception as e:
            print(e)
예제 #25
0
    def __init__(self, extractor='DefaultExtractor', **kwargs):
        if kwargs.get('url'):
            request = urllib2.Request(kwargs['url'], headers=self.headers)
            connection = urllib2.urlopen(request, timeout=10)
            self.data = connection.read()
            encoding = connection.headers['content-type'].lower().split(
                'charset=')[-1]
            if encoding.lower() == 'text/html':
                encoding = charade.detect(self.data)['encoding']
            if encoding is None:
                encoding = 'utf-8'
            self.data = str(self.data, encoding, errors='ignore')
        elif kwargs.get('html'):
            self.data = kwargs['html']
            if not isinstance(self.data, str):
                self.data = str(self.data,
                                charade.detect(self.data)['encoding'])
        else:
            raise Exception('No text or url provided')

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()

            self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." +
                                          extractor).INSTANCE
        finally:
            lock.release()

        reader = StringReader(self.data)
        self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument()
        self.extractor.process(self.source)
예제 #26
0
def _jdbc_connect_jpype(jclassname, additional_classpath, *args):
    import jpype
    # jpype.startJVM(jpype.getDefaultJVMPath(), '-Djava.class.path=C:/az_daten/workspaces/oksc-1.1.30-snapshot/oksc-selenium/src/test/resources/ojdbc14-10.2.0.3.0.jar')
    if not jpype.isJVMStarted():
        # make vm classpath arg
        if (additional_classpath is not None):
            additional_classpath = '-Djava.class.path=' + additional_classpath
            print('starting jvm with ' + additional_classpath)
        jpype.startJVM(jpype.getDefaultJVMPath(), additional_classpath)
    if not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()
    if _converters is None:
        types = jpype.java.sql.Types
        types_map = {}
        for i in types.__javaclass__.getClassFields():
            types_map[i.getName()] = i.getStaticAttribute()
        _init_converters(types_map)
    global _java_array_byte
    if _java_array_byte is None:

        def _java_array_byte(data):
            return jpype.JArray(jpype.JByte, 1)(data)

    # register driver for DriverManager
    jpype.JClass(jclassname)
    return jpype.java.sql.DriverManager.getConnection(*args)
예제 #27
0
파일: dbapi2.py 프로젝트: ym8468/etl_py
def _jdbc_connect_jpype(jclassname, *args):
    import jpype
    # Ensure JVN is running
    if not jpype.isJVMStarted():
        jpype.startJVM(jpype.getDefaultJVMPath())

    # Connect Jpype to JVM Thread
    if not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()

    # Initialize Type Conversion Mapping
    if _converters is None:
        types = jpype.java.sql.Types
        types_map = {}
        for i in types.__javaclass__.getClassFields():
            types_map[i.getName()] = i.getStaticAttribute()
        _init_converters(types_map)

    global _java_array_byte
    if _java_array_byte is None:
        def _java_array_byte(data):
            return jpype.JArray(jpype.JByte, 1)(data)

    # register driver for DriverManager
    jpype.JClass(jclassname)
    return jpype.java.sql.DriverManager.getConnection(*args)
예제 #28
0
def correlation_basedfeat_sel(bunch):
    """Correlation-based Feature Subset Selection, as implemented by the CfsSubsetEval class of Weka

    :param bunch: dataset
    :return: new dataset
    """

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    # Instances data!
    data = utilities.convertBunchToWekaInstances(bunch)

    Filter = jp.JClass('weka.filters.Filter')

    AttributeSelection = jp.JClass(
        'weka.filters.supervised.attribute.AttributeSelection')
    attsel_filter = AttributeSelection()

    CfsSubsetEval = jp.JClass('weka.attributeSelection.CfsSubsetEval')
    attsel_eval = CfsSubsetEval()

    GreedyStepwise = jp.JClass('weka.attributeSelection.BestFirst')
    attsel_search = GreedyStepwise()

    # attsel_search.setSearchBackwards(True) # True, true
    attsel_filter.setEvaluator(attsel_eval)
    attsel_filter.setSearch(attsel_search)
    attsel_filter.setInputFormat(data)

    new_instances = Filter.useFilter(data, attsel_filter)

    return utilities.convertWekaInstancesToBunch(new_instances)
예제 #29
0
    def __init__(self,
                 jvm_started=False,
                 parse_datetime=False,
                 minimum_heap_size='128m',
                 maximum_heap_size='2048m'):
        """Initializes Duckling.
        """

        self.parse_datetime = parse_datetime
        self._is_loaded = False
        self._lock = threading.Lock()

        if not jvm_started:
            self._classpath = self._create_classpath()
            self._start_jvm(minimum_heap_size, maximum_heap_size)

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() is not 1:
                    jpype.attachThreadToJVM()
            self._lock.acquire()

            self.clojure = jpype.JClass('clojure.java.api.Clojure')
            # require the duckling Clojure lib
            require = self.clojure.var("clojure.core", "require")
            require.invoke(self.clojure.read("duckling.core"))
        finally:
            self._lock.release()
예제 #30
0
def import_dataset_from_arff(arff, class_index=None):
    '''Imports Dataset From an ARFF Textual Format

    :param arff: the data in ARFF textual format
    :param classIndex: the index of the class attribute
    :return: a dataset (Bunch)
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    tmp = common.TemporaryFile(suffix='.arff')
    tmp.writeString(arff)

    source = jp.JClass('weka.core.converters.ConverterUtils$DataSource')(
        tmp.name)
    instances = source.getDataSet()

    if class_index is None:
        print 'Warning: class is set to the last attribute!'
        class_index = instances.numAttributes() - 1
    elif class_index == -1:
        class_index = instances.numAttributes() - 1

    instances.setClassIndex(class_index)
    return convert_weka_instances_to_bunch(instances)
예제 #31
0
 def _start_jvm(cls, jvm_path, jvm_options, driver_path):
     if jvm_path is None:
         jvm_path = jpype.get_default_jvm_path()
     if driver_path is None:
         driver_path = os.path.join(
             os.path.dirname(os.path.abspath(__file__)), ATHENA_JAR)
     if not jpype.isJVMStarted():
         _logger.debug('JVM path: %s', jvm_path)
         args = ['-server', '-Djava.class.path={0}'.format(driver_path)]
         if jvm_options:
             args.extend(jvm_options)
         _logger.debug('JVM args: %s', args)
         jpype.startJVM(jvm_path, *args)
         cls.class_loader = jpype.java.lang.Thread.currentThread(
         ).getContextClassLoader()
     if not jpype.isThreadAttachedToJVM():
         jpype.attachThreadToJVM()
         if not cls.class_loader:
             cls.class_loader = jpype.java.lang.Thread.currentThread(
             ).getContextClassLoader()
         class_loader = jpype.java.net.URLClassLoader.newInstance(
             [jpype.java.net.URL('jar:file:{0}!/'.format(driver_path))],
             cls.class_loader)
         jpype.java.lang.Thread.currentThread().setContextClassLoader(
             class_loader)
예제 #32
0
def correlation_basedfeat_sel(bunch):
    """Correlation-based Feature Subset Selection, as implemented by the CfsSubsetEval class of Weka

    :param bunch: dataset
    :return: new dataset
    """

    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    # Instances data!
    data = utilities.convertBunchToWekaInstances(bunch)

    Filter = jp.JClass('weka.filters.Filter')

    AttributeSelection = jp.JClass('weka.filters.supervised.attribute.AttributeSelection')
    attsel_filter = AttributeSelection()

    CfsSubsetEval = jp.JClass('weka.attributeSelection.CfsSubsetEval')
    attsel_eval = CfsSubsetEval()

    GreedyStepwise = jp.JClass('weka.attributeSelection.BestFirst')
    attsel_search = GreedyStepwise()

    # attsel_search.setSearchBackwards(True) # True, true
    attsel_filter.setEvaluator(attsel_eval)
    attsel_filter.setSearch(attsel_search)
    attsel_filter.setInputFormat(data)

    new_instances = Filter.useFilter(data, attsel_filter)

    return utilities.convertWekaInstancesToBunch(new_instances)
예제 #33
0
    def _get_connection(self, server, user, password, service, jdbc_driver,
                        tags):
        try:
            # Check if the instantclient is available
            cx_Oracle.clientversion()
        except cx_Oracle.DatabaseError as e:
            # Fallback to JDBC
            use_oracle_client = False
            self.log.debug(
                'Oracle instant client unavailable, falling back to JDBC: {}'.
                format(e))
            connect_string = self.JDBC_CONNECT_STRING.format(server, service)
        else:
            use_oracle_client = True
            self.log.debug('Running cx_Oracle version {0}'.format(
                cx_Oracle.version))
            connect_string = self.CX_CONNECT_STRING.format(
                user, password, server, service)

        try:
            if use_oracle_client:
                con = cx_Oracle.connect(connect_string)
            else:
                try:
                    if jpype.isJVMStarted(
                    ) and not jpype.isThreadAttachedToJVM():
                        jpype.attachThreadToJVM()
                        jpype.java.lang.Thread.currentThread(
                        ).setContextClassLoader(
                            jpype.java.lang.ClassLoader.getSystemClassLoader())
                    con = jdb.connect(self.ORACLE_DRIVER_CLASS, connect_string,
                                      [user, password], jdbc_driver)
                except jpype.JException(jpype.java.lang.RuntimeException) as e:
                    if "Class {} not found".format(
                            self.ORACLE_DRIVER_CLASS) in str(e):
                        msg = """Cannot run the Oracle check until either the Oracle instant client or the JDBC Driver
                        is available.
                        For the Oracle instant client, see:
                        http://www.oracle.com/technetwork/database/features/instant-client/index.html
                        You will also need to ensure the `LD_LIBRARY_PATH` is also updated so the libs are reachable.

                        For the JDBC Driver, see:
                        http://www.oracle.com/technetwork/database/application-development/jdbc/downloads/index.html
                        You will also need to ensure the jar is either listed in your $CLASSPATH or in the yaml
                        configuration file of the check.
                        """
                        self.log.error(msg)
                    raise

            self.log.debug("Connected to Oracle DB")
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               tags=tags)
        except Exception as e:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=tags)
            self.log.error(e)
            raise
        return con
예제 #34
0
    def __init__(self, jsrc_path):
        """
        :param jsrc_path: (str) Path that contains compiled reeb_graph java project
                                (https://github.com/dbespalov/reeb_graph)
        """
        self.jsrc_path = jsrc_path

        if not jpype.isJVMStarted():
            jpype.startJVM(classpath=[jsrc_path], convertStrings=True)
        elif not jpype.isThreadAttachedToJVM():
            jpype.attachThreadToJVM()

        # These imports are activated by jpype after starting the JVM
        from java.lang import System
        from java.io import PrintStream, File
        # Disable java output.
        System.setOut(PrintStream(
            File('/dev/null')))  # NUL for windows, /dev/null for unix

        self.erg = jpype.JClass('ExtractReebGraph')()
        self.crg = jpype.JClass('CompareReebGraph')()

        # Set defaults
        self.params = ['4000', '0.005', str(2**7), '0.5']

        # the reeb project tries to save a file in the working directory -> redirect to tmp briefly
        wd = Path.cwd()
        os.chdir('/tmp')
        self.erg.main(self.params[:3])
        self.crg.main(self.params)
        try:
            (Path.cwd() / 'log_{}_{}_{}_{}'.format(*self.params)).unlink()
        except FileNotFoundError:
            pass
        os.chdir(str(wd))
예제 #35
0
def setup_conn():
    try:
        import jaydebeapi
        import jpype

        if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
            jpype.attachThreadToJVM()
            jpype.java.lang.Thread.currentThread().setContextClassLoader(
                jpype.java.lang.ClassLoader.getSystemClassLoader())

            con = jaydebeapi.connect(
                "oracle.jdbc.driver.OracleDriver",
                "jdbc:oracle:thin:@wmsdbtst01.sager.com:1521:MV10TST",
                ["TSTMOVE", "TSTMOVE"])
            return con.cursor()
        else:
            jvm_path = jpype.getDefaultJVMPath()
            jpype.startJVM(jvm_path, '-Djava.class.path=C:\ojdbc10.jar')
            con = jaydebeapi.connect(
                "oracle.jdbc.driver.OracleDriver",
                "jdbc:oracle:thin:@wmsdbtst01.sager.com:1521:MV10TST",
                ["TSTMOVE", "TSTMOVE"])
            return con.cursor()

    except Exception as e:
        print(e)
        print('no connection')
        return None
예제 #36
0
    def __init__(self, extractor='DefaultExtractor', **kwargs):
        if kwargs.get('url'):
            request     = urllib2.Request(kwargs['url'], headers=self.headers)
            connection  = urllib2.urlopen(request)
            self.data   = connection.read()
            encoding    = connection.headers['content-type'].lower().split('charset=')[-1]
            if encoding.lower() == 'text/html':
                encoding = charade.detect(self.data)['encoding']
            # self.data = unicode(self.data, 'gbk')
            #self.data = self.data.decode(encoding, 'ignore')
            try:
                self.data = unicode(self.data, charade.detect(self.data)['encoding'])
            except UnicodeError:
                encoding = charade.detect(self.data)['encoding']
                self.data = self.data.decode(encoding, 'ignore')
        elif kwargs.get('html'):
            self.data = kwargs['html']
            if not isinstance(self.data, unicode):
                try:
		    self.data = unicode(self.data,'gbk')
                #self.data = unicode(self.data, charade.detect(self.data)['encoding'])
                #try:
                #    self.data = unicode(self.data, charade.detect(self.data)['encoding'])
                except UnicodeError:
		    
                    encoding = charade.detect(self.data)['encoding']
                    print "charset is :",encoding
		    self.data = self.data.decode(encoding, 'ignore')
        ## Extractor(extractor='ArticleExtractor',file='/tmp/a.html')
        elif kwargs.get('file'):
            Path = kwargs['file']
            f = open(Path, 'r')
            self.data = f.read()
            f.close()
            if not isinstance(self.data, unicode):
                try:
                    self.data = unicode(self.data, charade.detect(self.data)['encoding'])
                except UnicodeError:
                    encoding = charade.detect(self.data)['encoding']
                    self.data = self.data.decode(encoding, 'ignore')

        else:
            raise Exception('No text or url provided')

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()

            self.extractor = jpype.JClass(
                "de.l3s.boilerpipe.extractors."+extractor).INSTANCE
        finally:
            lock.release()

        reader = StringReader(self.data)
        self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument()
        self.extractor.process(self.source)
예제 #37
0
def main(args, options):
    #===========================================================================
    # Load the NL parsing MLN
    #===========================================================================
    mln = MLN(mlnfile=os.path.join(prac.locations.pracmodules, 'nl_parsing',
                                   'mln', 'predicates.mln'),
              grammar='PRACGrammar',
              logic='FuzzyLogic')

    #===========================================================================
    # Load the Java VM
    #===========================================================================
    if not java.isJvmRunning():
        java.initJvm()
    if not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()

    #===========================================================================
    # # suppress the stderr outputs from the parser
    #===========================================================================
    jpype.java.lang.System.setErr(jpype.java.io.PrintStream(os.devnull))

    #===========================================================================
    # Initialize the parser
    #===========================================================================
    stanford_parser = StanfordParser(grammar_path)
    dbs = []
    sentences = args
    for s in sentences:
        db = ''
        deps = stanford_parser.get_dependencies(json.loads(s), True)
        deps = map(str, deps)
        words = set()
        for d in deps:
            # replace : by _ in stanford predicates
            res = re.match('(!?)(.+)\((.+)\)$', d)
            if res:
                d = '{}{}({})'.format(res.group(1),
                                      res.group(2).replace(':', '_'),
                                      res.group(3))
            _, pred, args = mln.logic.parse_literal(str(d))
            words.update(args)
            db += '{}({})\n'.format(pred, ', '.join(args))
        postags = stanford_parser.get_pos()
        pos = []
        for pos in postags.values():
            if not pos[0] in words:
                continue
            postagatom = 'has_pos({},{})'.format(pos[0], pos[1])
            pos.append(postagatom)
            db += '{}\n'.format(postagatom)
            postags[pos[0]] = pos[1]
        dbs.append(db)
    result = '---\n'.join(dbs)
    if options.outfile is not None:
        with open(options.outfile, 'w+') as f:
            f.write(result)
    else:
        print result
예제 #38
0
def deserialize_weka_object(objString):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    d = jp.JClass('weka.core.SerializationHelper')
    tfile = TemporaryFile(flags='wb+')
    tfile.writeString(b64decode(objString))
    return d.read(tfile.name)
예제 #39
0
def serialize_weka_object(obj):
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    s = jp.JClass('weka.core.SerializationHelper')
    tfile = TemporaryFile(flags='wb+')
    s.write(tfile.name, obj)
    return b64encode(tfile.fp.read())
예제 #40
0
    def print_classifier(self):
        if not jp.isThreadAttachedToJVM():
            jp.attachThreadToJVM()

        try:
            classifier = common.deserialize_weka_object(self.sclassifier)
            return classifier.toString()
        except:
            raise Exception("Only WEKA classifiers/models supported. Please provide a valid WEKA learner.")
예제 #41
0
def get_jdbc_connection(iotdbIp , iotdbUser , iotdbPassword):

        if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
            jpype.attachThreadToJVM()
            jpype.java.lang.Thread.currentThread().setContextClassLoader(jpype.java.lang.ClassLoader.getSystemClassLoader())
        connection = JDBC.connect('org.apache.iotdb.jdbc.IoTDBDriver', iotdbIp, [iotdbUser, iotdbPassword],
					 'iotdb-jdbc-0.11.3-SNAPSHOT-jar-with-dependencies.jar')

        return connection
예제 #42
0
    def __init__(self, extractor='DefaultExtractor', **kwargs):
        if kwargs.get('url'):
            request = Request(kwargs['url'], headers=self.headers)
            connection = urlopen(request)
            self.data = connection.read()
            encoding = connection.headers['content-type'].lower().split(
                'charset=')[-1]
            if encoding.lower() == 'text/html':
                encoding = chardet.detect(self.data)['encoding']
            try:
                import gzip
                import StringIO
                data = StringIO.StringIO(self.data)
                gzipper = gzip.GzipFile(fileobj=data)
                self.data = gzipper.read()
                #self.data = gzip.decompress(self.data)
            except Exception as inst:
                #print inst
                pass
            try:
                self.data = unicode(self.data, encoding)
            except NameError:
                self.data = self.data.decode(encoding)
        elif kwargs.get('html'):
            self.data = kwargs['html']
            try:
                if not isinstance(self.data, unicode):
                    self.data = unicode(self.data,
                                        chardet.detect(self.data)['encoding'])
            except NameError:
                if not isinstance(self.data, str):
                    self.data = self.data.decode(
                        chardet.detect(self.data)['encoding'])
        else:
            raise Exception('No text or url provided')

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()

            if extractor == "KeepEverythingWithMinKWordsExtractor":
                kMin = kwargs.get("kMin", 1)  # set default to 1
                self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." +
                                              extractor)(kMin)
            else:
                self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." +
                                              extractor).INSTANCE

        finally:
            lock.release()

        reader = StringReader(self.data)
        self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument()
        self.extractor.process(self.source)
    def __init__(self, extractor='DefaultExtractor', **kwargs):
        if kwargs.get('url'):

            request     = urllib2.Request(kwargs['url'], headers=self.headers)

            # Version without headers
            # request     = urllib2.Request(kwargs['url'])

            connection  = urllib2.urlopen(request)

            self.data   = connection.read()

            encoding    = connection.headers['content-type'].lower().split('charset=')[-1]

            # Try requests
            # request     = requests.get(kwargs['url'], headers=self.headers, verify=False)

            # self.data   = request.text
            # encoding    = request.headers['content-type'].lower().split('charset=')[-1]

            if encoding.lower() == 'text/html':
                encoding = charade.detect(self.data)['encoding']

                try:

                    self.data = unicode(self.data, encoding, errors='replace')

                except LookupError as e:

                    print e
                    import ipdb; ipdb.set_trace()  # XXX BREAKPOINT

        elif kwargs.get('html'):
            self.data = kwargs['html']

            if not isinstance(self.data, unicode):
                self.data = unicode(self.data, charade.detect(self.data)['encoding'], errors='replace')
                import ipdb; ipdb.set_trace()  # XXX BREAKPOINT

        else:
            raise Exception('No text or url provided')

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()

            self.extractor = jpype.JClass(
                "de.l3s.boilerpipe.extractors."+extractor).INSTANCE
        finally:
            lock.release()

        reader = StringReader(self.data)
        self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument()
        self.extractor.process(self.source)
예제 #44
0
def attach_thread_to_jvm() -> None:
    "https://github.com/baztian/jaydebeapi/issues/14#issuecomment-261489331"

    import jpype

    if jpype.isJVMStarted() and not jpype.isThreadAttachedToJVM():
        jpype.attachThreadToJVM()
        jpype.java.lang.Thread.currentThread().setContextClassLoader(
            jpype.java.lang.ClassLoader.getSystemClassLoader())
예제 #45
0
def apply_mapped_classifier_get_instances(weka_classifier, original_data,
                                          data):
    '''An advanced version of the Apply Classifier method.
    Addresses incompatible training and test data, and returns a dataset with predictions.

    :param weka_classifier: WekaClassifier object
    :param original_data: original training instances, bunch
    :param data: test instances, bunch
    :return: Dataset (Bunch) object with predictions and a textual report from the InputMappedClassifier class
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    try:
        classifier = common.deserialize_weka_object(
            weka_classifier.sclassifier)
    except:
        raise Exception(
            "Only WEKA classifiers/models supported. Please provide a valid WEKA learner."
        )

    original_training_instances = ut.convert_bunch_to_weka_instances(
        original_data)
    instances = ut.convert_bunch_to_weka_instances(data)

    # serialize classifier with original instances to a file once again for the Mapped classifier
    tfile = common.TemporaryFile(flags='wb+')
    s = jp.JClass('weka.core.SerializationHelper')
    s.writeAll(tfile.name, [classifier, original_training_instances])

    # construct a MappedClassifier
    mapped_classifier = jp.JClass(
        'weka.classifiers.misc.InputMappedClassifier')()
    mapped_classifier.setIgnoreCaseForNames(True)
    mapped_classifier.setTrim(True)
    # mapped_classifier.setSuppressMappingReport(True)
    # mc.setModelHeader(original_training_instances)
    mapped_classifier.setModelPath(tfile.name)

    predictions = []
    try:
        for instance in instances:
            label = int(mapped_classifier.classifyInstance(instance))
            predictions.append(label)

        data["targetPredicted"] = predictions
    except:
        raise Exception(
            "Classifier not built. Please use the Build Classifier widget first."
        )

    report = mapped_classifier.toString()
    if MAPPING_REPORT_START in report:
        report = report[report.index(MAPPING_REPORT_START):]

    return data, report
예제 #46
0
    def __init__(self, extractor='DefaultExtractor', **kwargs):
        if kwargs.get('url'):
            # Correctly encode url
            url = unicode(kwargs['url'])
            if re_rus.search(url):
                url = re_http.sub("", url)
                url = re_slash.sub("", url)
                url = url.encode("idna")
                url = "http://" + url

            # Set header
            h = {'User-Agent': self.headers[0], 'Accept': '*/*'}

            # Download the page
            request = urllib2.Request(url, headers=h)
            connection = urllib2.urlopen(request)
            self.data = connection.read()
            encoding = connection.headers['content-type'].lower().split(
                'charset=')[-1]

            # Decode the page contents in the correct encoding
            if self.data is None:
                raise Exception('Html data cannot be extracted.')
            if encoding.lower() == 'text/html':
                encoding = charade.detect(self.data)['encoding']
            old = encoding
            encoding = re_enc_error.sub("", encoding)
            encoding = re_enc_error2.sub("", encoding)
            encoding = re_enc_win.sub("windows-1251", encoding)
            if re_enc_def.search(encoding): encoding = DEFAULT_ENCODING
            self.data = unicode(self.data, encoding, "ignore")
            connection.close()

        elif kwargs.get('html'):
            self.data = kwargs['html']
            if not isinstance(self.data, unicode):
                self.data = unicode(self.data,
                                    charade.detect(self.data)['encoding'])
        else:
            raise Exception('No text or url provided')

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()

            self.extractor = jpype.JClass("de.l3s.boilerpipe.extractors." +
                                          extractor).INSTANCE
        finally:
            lock.release()

        reader = StringReader(self.data)
        self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument()
        self.extractor.process(self.source)
예제 #47
0
def weka_local_jrip(input_dict):
    '''The RIPPER rule learner by Weka
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.rules.JRip')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'JRip_learner': sclassifier}
예제 #48
0
def weka_local_k_star(input_dict):
    '''Instance-Based learner K* by Weka
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.lazy.KStar')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'KStar_learner': sclassifier}
예제 #49
0
def weka_local_zeror(input_dict):
    '''Weka's rulesZeroR classifier: predicts the mean (for a numeric class) or the mode (for a nominal class).
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.rules.ZeroR')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'classifier': sclassifier}
예제 #50
0
def weka_local_naive_bayes(input_dict):
    '''Naive Bayes classifier provided by Weka. Naive Bayes is a simple probabilistic classifier based on applying the Bayes' theorem.
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.bayes.NaiveBayes')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'Naive_Bayes_learner': sclassifier}
예제 #51
0
def weka_local_random_forest(input_dict):
    '''Random Forest learner by Weka
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.trees.RandomForest')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'RandomForest_learner': sclassifier}
예제 #52
0
def weka_local_rep_tree(input_dict):
    '''A REP Tree, which is a fast decision tree learner. Builds a decision/regression tree using information gain/variance and prunes it using reduced-error pruning
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.trees.REPTree')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'REPTree_learner': sclassifier}
예제 #53
0
def weka_local_multilayer_perceptron(input_dict):
    '''Feedforward artificial neural network, using backpropagation to classify instances
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.functions.MultilayerPerceptron')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'Multilayer_Perceptron_learner': sclassifier}
예제 #54
0
def weka_local_smo(input_dict):
    '''A support vector classifier, trained using the Sequential Minimal Optimization (SMO) algorithm
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.functions.SMO')()
    model.setOptions(common.parse_options(input_dict['params']))
    sclassifier = common.serialize_weka_object(model)
    return {'SMO_learner': sclassifier}
예제 #55
0
def weka_local_j48(input_dict):
    '''Weka decision tree learner J48
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.trees.J48')()
    model.setOptions(common.parse_options(input_dict['params']))

    sclassifier = common.serialize_weka_object(model)
    return {'J48_learner': sclassifier}
예제 #56
0
def weka_local_random_tree(input_dict):
    '''A tree that considers K randomly chosen attributes at each node, and performs no pruning
    '''
    if not jp.isThreadAttachedToJVM():
        jp.attachThreadToJVM()

    model = jp.JClass('weka.classifiers.trees.RandomTree')()
    model.setOptions(common.parse_options(input_dict['params']))

    sclassifier = common.serialize_weka_object(model)
    return {'RandomTree_learner': sclassifier}
예제 #57
0
    def __getattribute__(self, name):
        if not jpype.isJVMStarted():
            object.__getattribute__(self, 'init_JVM')()

        if not jpype.isThreadAttachedToJVM():
            jpype.attachThreadToJVM()
        
        if not object.__getattribute__(self, 'initialized'):
            object.__getattribute__(self, 'initialize')()

        return object.__getattribute__(self, name)
    def __init__(self, extractor='DefaultExtractor', **kwargs):
        if kwargs.get('url'):
            # Correctly encode url  
            url = unicode(kwargs['url'])
            if re_rus.search(url):
                url = re_http.sub("", url)
                url = re_slash.sub("", url)
                url = url.encode("idna")
                url = "http://" + url

            # Set header 
            h = {'User-Agent':self.headers[0], 'Accept':'*/*'}
            
            # Download the page
            request     = urllib2.Request(url, headers=h)
            connection  = urllib2.urlopen(request)
            self.data   = connection.read()
            encoding    = connection.headers['content-type'].lower().split('charset=')[-1]

            # Decode the page contents in the correct encoding
            if self.data is None: 
		raise Exception('Html data cannot be extracted.')
            if encoding.lower() == 'text/html':
                encoding = charade.detect(self.data)['encoding']
            old = encoding
            encoding = re_enc_error.sub("", encoding)
	    encoding = re_enc_error2.sub("", encoding)
	    encoding = re_enc_win.sub("windows-1251", encoding)
            if re_enc_def.search(encoding): encoding = DEFAULT_ENCODING
	    self.data = unicode(self.data, encoding, "ignore")
	    connection.close()

        elif kwargs.get('html'):
            self.data = kwargs['html']
            if not isinstance(self.data, unicode):
                self.data = unicode(self.data, charade.detect(self.data)['encoding'])
        else:
            raise Exception('No text or url provided')

        try:
            # make it thread-safe
            if threading.activeCount() > 1:
                if jpype.isThreadAttachedToJVM() == False:
                    jpype.attachThreadToJVM()
            lock.acquire()
            
            self.extractor = jpype.JClass(
                "de.l3s.boilerpipe.extractors."+extractor).INSTANCE
        finally:
            lock.release()
    
        reader = StringReader(self.data)
        self.source = BoilerpipeSAXInput(InputSource(reader)).getTextDocument()
        self.extractor.process(self.source)