def cfweka(input_dict, widget, name): from services.webservice import WebService wseval = WebService('http://vihar.ijs.si:8092/Evaluation?wsdl', float(input_dict['timeout'])) wsutil = WebService('http://vihar.ijs.si:8092/Utilities?wsdl', float(input_dict['timeout'])) somelearner = input_dict['learner'] print somelearner data = input_dict['data'] # arffstr = toARFFstring(data).getvalue() # #print arffstr # wekaInstances = wsutil.client.arff_to_weka_instances(arff = arffstr, class_index = odt.domain.index(odt.domain.classVar)) # #print wekaInstances # model = wseval.client.build_classifier(learner = somelearner, instances = wekaInstances['instances']) # #return {} # addMetaID(data) k = int(input_dict['k_folds']) noisyIndices = [] selection = orange.MakeRandomIndicesCV(data, folds=k) count_noisy = [0] * k for test_fold in range(k): train_arffstr = toARFFstring( data.select(selection, test_fold, negate=1)).getvalue() train_data = wsutil.client.arff_to_weka_instances( arff=train_arffstr, class_index=data.domain.index(data.domain.classVar))['instances'] test_inds = [ i for i in range(len(selection)) if selection[i] == test_fold ] test_arffstr = toARFFstring(data.select(selection, test_fold)).getvalue() test_data = wsutil.client.arff_to_weka_instances( arff=test_arffstr, class_index=data.domain.index(data.domain.classVar))['instances'] #print "\t\t", "Learned on", len(train_data), "examples" #file.flush() print "pred cl build" classifier = wseval.client.build_classifier( learner=somelearner, instances=train_data)['classifier'] print "po cl build" eval_test_data = wseval.client.apply_classifier(classifier=classifier, instances=test_data) print "po eval" for i in range(len(eval_test_data)): #print "Test data length:", len(test_data), "Test inds length:", len(test_inds), "Eval Test data length:", len(eval_test_data) print i, "v for zanki", eval_test_data[i]['classes'], data[ test_inds[i]].getclass() if eval_test_data[i]['classes'] != unicode( data[test_inds[i]].getclass()): # selection_filter[int(example[meta_id].value)] = 0 noisyIndices.append(test_inds[i]) count_noisy[test_fold] += 1 # END test_data widget.progress = int((test_fold + 1) * 1.0 / k * 100) widget.save() # END test_fold return {'inds': sorted(noisyIndices), 'name': getWekaName(name)}
def call_webservice(input_dict): from services.webservice import WebService ws = WebService(input_dict['wsdl'], float(input_dict['timeout'])) selected_method = {} for method in ws.methods: if method['name'] == input_dict['wsdl_method']: selected_method = method function_to_call = getattr(ws.client, selected_method['name']) ws_dict = {} for i in selected_method['inputs']: try: ws_dict[i['name']] = input_dict[i['name']] if ws_dict[i['name']] is None: pass if i['type'] == bool: if input_dict[i['name']] == "true": ws_dict[i['name']] = 1 else: ws_dict[i['name']] = 0 if ws_dict[i['name']] == '': if input_dict['sendemptystrings'] == "true": ws_dict[i['name']] = '' else: ws_dict.pop(i['name']) except Exception as e: print e ws_dict[i['name']] = '' results = function_to_call(**ws_dict) output_dict = results return output_dict
def nlp_def_extraction_terms(input_dict): ''' Definition extraction using terms. ''' annotations = input_dict['annotations'] term_candidates = input_dict['term_candidates'] lang = input_dict['lang'] wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8099') terms_per_sentence = input_dict['terms_per_sentence'] nominatives = input_dict['nominatives'] threshold = input_dict['threshold'] verb_two_terms = input_dict['verb_two_terms'] multiword_term = input_dict['multiword_term'] num_multiterms = input_dict['num_multiterms'] term_beginning = input_dict['term_beginning'] ws = WebService(wsdl, 60000) response = ws.client.GlossaryExtractionByTerms( corpus=annotations, candidates=term_candidates, lang=lang, nominatives=nominatives, termsPerSent=terms_per_sentence, select=threshold, verb_two_terms=verb_two_terms, multiword_term=multiword_term, num_multiterms=num_multiterms, term_beginning=term_beginning) return {'sentences': response['candidates']}
def ilp_sdmaleph(input_dict): import orange ws = WebService('http://vihar.ijs.si:8097', 3600) data = input_dict.get('examples') if isinstance(data, orange.ExampleTable): with tempfile.NamedTemporaryFile(suffix='.tab', delete=True) as f: data.save(f.name) examples = f.read() elif isinstance(data, list): examples = json.dumps(data) elif isinstance(data, str): examples = data else: raise Exception('Illegal examples format. \ Supported formats: str, list or Orange') response = ws.client.sdmaleph( examples=examples, mapping=input_dict.get('mapping'), ontologies=[{'ontology' : ontology} for ontology in input_dict.get('ontology')], relations=[{'relation' : relation} for relation in input_dict.get('relation')], posClassVal=input_dict.get('posClassVal') if input_dict.get('posClassVal') != '' else None, cutoff=input_dict.get('cutoff') if input_dict.get('cutoff') != '' else None, minPos=input_dict.get('minPos') if input_dict.get('minPos') != '' else None, noise=input_dict.get('noise') if input_dict.get('noise') != '' else None, clauseLen=input_dict.get('clauseLen') if input_dict.get('clauseLen') != '' else None, dataFormat=input_dict.get('dataFormat') if input_dict.get('dataFormat') != '' else None ) return {'theory' : response['theory']}
def load_corpus(input_dict): ''' Parses an input file and encodes it in base 64. ''' f = safeOpen(input_dict['file']) fname = os.path.basename(input_dict['file']) wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl') data = base64.b64encode(f.read()) ws = WebService(wsdl, 60000) response = ws.client.parseFile(fileName=fname, inFile=data) return {'corpus': response['parsedFile']}
def nlp_def_extraction_wnet(input_dict): ''' Definition extraction using WordNet. ''' annotations = input_dict['annotations'] lang = input_dict['lang'] wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8099') ws = WebService(wsdl, 60000) response = ws.client.GlossaryExtractionByWnet(corpus=annotations, lang=lang) return {'sentences': response['candidates']}
def get_cross_validation_accuracy(arff): acs=[] for a in range(10): j48 = WebService("http://vihar.ijs.si:8092/Classification?wsdl") j48_response = j48.client.J48(params="") j48_learner = j48_response['J48_learner'] arff2weka = WebService("http://vihar.ijs.si:8092/Utilities?wsdl") arff2weka_response = arff2weka.client.arff_to_weka_instances(arff=arff,class_index="") instances = arff2weka_response['instances'] cv = WebService("http://vihar.ijs.si:8092/Evaluation?wsdl",timeout=600) cv_response = cv.client.cross_validate(learner=j48_learner,instances=instances,folds=5) accuracy = cv_response['accuracy'] acs.append(float(accuracy)) return sum(acs)*1./len(acs)
def nlp_term_extraction(input_dict): ''' Term extraction from totrtale annotations. ''' annotations = input_dict['annotations'] lang = input_dict['lang'] wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl') ws = WebService(wsdl, 60000) response = ws.client.TermExtraction(corpus=annotations, lang=lang, threshold=0) return {'candidates': response['candidates']}
def nlp_def_extraction_patterns(input_dict): ''' Definition extraction using pre-defined patterns. ''' annotations = input_dict['annotations'] lang = input_dict['lang'] wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8099') ws = WebService(wsdl, 60000) pattern = input_dict['pattern'] response = ws.client.GlossaryExtractionByPatterns(corpus=annotations, lang=lang, pattern=pattern) return {'sentences': response['candidates']}
def nlp_term_extraction(input_dict): ''' Term extraction from totrtale annotations. ''' annotations = input_dict['annotations'] lang = input_dict['lang'] wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl') if '<TEI xmlns="http://www.tei-c.org/ns/1.0">' in annotations: annotations = XMLtoTEI(annotations) ws = WebService(wsdl, 60000) response = ws.client.TermExtraction(corpus=annotations, lang=lang, threshold=0) return {'candidates': response['candidates']}
def call_webservice(input_dict): from services.webservice import WebService ws = WebService(input_dict['wsdl'], float(input_dict['timeout'])) selected_method = {} for method in ws.methods: if method['name'] == input_dict['wsdl_method']: selected_method = method function_to_call = getattr(ws.client, selected_method['name']) ws_dict = {} for i in selected_method['inputs']: try: ws_dict[i['name']] = input_dict[i['name']] if ws_dict[i['name']] is None: pass if i['type'] == bool: if input_dict[i['name']] == "true": ws_dict[i['name']] = 1 else: ws_dict[i['name']] = 0 if ws_dict[i['name']] == '': if input_dict['sendemptystrings'] == "true": ws_dict[i['name']] = '' else: ws_dict.pop(i['name']) except Exception as e: print e ws_dict[i['name']] = '' results = function_to_call(**ws_dict) output_dict = results if type(results) == dict: return output_dict elif type(results) == list: output_dict = {} for l in results: if type(l) == dict: for k in l.keys(): a = output_dict.get(k, []) a.append(l[k]) output_dict[k] = a return output_dict return results
def cfdecide(input_dict, widget): from pysimplesoap.client import SoapFault somelearner = input_dict['learner'] print somelearner # SWITCH TO PROCESSING WITH WEKA CLASSIFIERS if type(somelearner) == unicode: from services.webservice import WebService wsutil = WebService('http://vihar.ijs.si:8092/Utilities?wsdl', float(input_dict['timeout'])) name = "" try: name = wsutil.client.print_model( model=somelearner)['model_as_string'] print wsutil.client.print_model(model=somelearner), name except SoapFault: # TODO something print "Soap fault: unicode string is not a Weka classification learner/model." return {} return cfweka(input_dict, widget, name) else: return cforange(input_dict, widget)
def nlp_totrtale(input_dict): ''' Calls the totrtale web service. ''' corpus = input_dict['corpus'] lang = input_dict['lang'] wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl') xml = input_dict['xml'] == 'true' postprocess = input_dict['postprocess'] == 'true' bohoricica = input_dict['bohoricica'] == 'true' antique = input_dict['antique'] == 'true' ws = WebService(wsdl, 60000) response = ws.client.runTotale(inFile=corpus, language=lang, postProcessing=postprocess, bohoricica=bohoricica, antiqueSlovenian=antique, outputAsXML=xml) errors = response['error'] if errors: print errors return {'annotations': response['annotatedFile']}
def import_webservice(self, request): wsdl = request.data.get('wsdl') ws = WebService(wsdl) wsdl_category, _ = Category.objects.get_or_create(name='WSDL Imports') new_c = Category() current_name = ws.name i = 0 while request.user.categories.filter(name=current_name).count() > 0: i = i + 1 current_name = ws.name + ' (' + str(i) + ')' new_c.name = current_name new_c.user = request.user new_c.workflow = request.user.userprofile.active_workflow new_c.parent = wsdl_category new_c.save() for m in ws.methods: new_a = AbstractWidget() new_a.name = m['name'] new_a.action = 'call_webservice' new_a.wsdl = ws.wsdl_url new_a.wsdl_method = m['name'] new_a.description = m['documentation'] new_a.user = request.user new_a.category = new_c new_a.save() new_i = AbstractInput() new_i.parameter = True new_i.widget = new_a new_i.name = "Timeout" new_i.short_name = "to" new_i.variable = "timeout" new_i.default = '60' new_i.parameter_type = 'text' new_i.save() new_i = AbstractInput() new_i.parameter = True new_i.widget = new_a new_i.name = "Send empty strings to webservices" new_i.short_name = "ses" new_i.variable = "sendemptystrings" new_i.default = '' new_i.parameter_type = 'checkbox' new_i.save() for i in m['inputs']: new_i = AbstractInput() new_i.name = i['name'] new_i.variable = i['name'] new_i.short_name = i['name'][:3] new_i.description = '' new_i.required = False new_i.parameter = False if i['type'] == bool: new_i.parameter_type = 'checkbox' else: new_i.parameter_type = 'textarea' new_i.default = '' new_i.widget = new_a new_i.save() for o in m['outputs']: new_o = AbstractOutput() new_o.name = o['name'] new_o.variable = o['name'] new_o.short_name = o['name'][:3] new_o.description = '' new_o.widget = new_a new_o.save() data = json.dumps({'category_id': new_c.id}) return HttpResponse(data, 'application/json')