def api_learn_add_del_route(op): if not op in allowed_endpoints: abort(404) session['username'] = database.get_username(request.form['interface'], request.form['username']) if session['username'] == None: abort(403) session['logged_in'] = True print '@@@@@@@@', session['username'] options = {} if not op == 'query': options = generic_learn_route( op, request.form, request.files['file'] if 'file' in request.files else None) else: try: # Retrieve knowledge. if 'type' in request.form and request.form['type'] == 'text': options['text'] = database.get_text(session['username']) elif 'type' in request.form and request.form['type'] == 'image': options['pictures'] = database.get_images(session['username']) else: options['pictures'] = database.get_images(session['username']) options['text'] = database.get_text(session['username']) except Exception as e: log(e) options['errno'] = 500 options['error'] = str(e) if 'errno' in options: return json.dumps(options), options['errno'] return json.dumps(options), 200
def prepare_delta_E_sums(shifted_energies, temperatures, _accuracy, options): """ Prepares the sum of the second part of the Z_c_m equation """ energies_cnt = len(shifted_energies) temp_cnt = len(temperatures) delta_E_sums = np.empty([energies_cnt, temp_cnt], dtype=_accuracy) log(__name__, "Preparing delta_E_sums to speed up the calculations", options.verbose, indent=3) for t_i in range(temp_cnt): temperature = temperatures[t_i] kT = _accuracy(Constants.kB * temperature) for e_i in range(energies_cnt): case_energies = shifted_energies[e_i] cnt_case_energies = len(case_energies) sum_Ediff = _accuracy(0.0) # summing in the reverse order in order to account for small values for i in reversed(range(cnt_case_energies)): sum_Ediff += _accuracy(math.exp(-1.0*(case_energies[i]) / (kT))) delta_E_sums[e_i][t_i] = sum_Ediff return delta_E_sums
def prepare_energies(input_data_array, _accuracy, options): """ Prepares energy arrays for the calculations """ log(__name__, "Preparing the energies", options.verbose, indent=3) cases_cnt = input_data_array.shape[0] min_energies = np.empty(cases_cnt, dtype=_accuracy) energies = [] shifted_energies = [] experiment_cnts = np.empty(cases_cnt, dtype=_accuracy) for i in range(cases_cnt): # removes all zero values from the beginning and end of the array case_energies = np.trim_zeros(copy.deepcopy(input_data_array[i])) energies.append(case_energies) # it might be the first value but it might not be, thus we getting the minimum min_energies[i] = _accuracy(np.min(case_energies)) diff_energy = case_energies - min_energies[i] shifted_energies.append(diff_energy) # saving the number of experiments of the stoichiometry experiment_cnts[i] = len(case_energies) return energies, min_energies, shifted_energies, experiment_cnts
def prepare_parameters(options): """ Prepares analysis parameters: temperature and mu """ success = True error = "" temperatures = None chem_pot_range = None # reading in the temperatures success, error, temperatures = Utilities.get_list_of_temps(options.temps) if not success: return success, error # reading in the chemical potential if options.urange is not None: success, error, chem_pot_range = Utilities.get_chem_pot_range(options.urange) if not success: return success, error log(__name__, "Analysis will be performed at (K): %s" % (options.temps), options.verbose, indent=2) if chem_pot_range is not None: log(__name__, "mu parameter: %s" % (options.urange), options.verbose, indent=2) return success, error, temperatures, chem_pot_range
def api_learn_add_del_route(op): if not op in allowed_endpoints: abort(404) session['username'] = database.get_username(request.form['interface'], request.form['username']) if session['username'] == None: abort (403) session['logged_in'] = True print '@@@@@@@@', session['username'] options = {} if not op == 'query': options = generic_learn_route(op, request.form, request.files['file'] if 'file' in request.files else None) else: try: # Retrieve knowledge. if 'type' in request.form and request.form['type'] == 'text': options['text'] = database.get_text(session['username']) elif 'type' in request.form and request.form['type'] == 'image': options['pictures'] = database.get_images(session['username']) else: options['pictures'] = database.get_images(session['username']) options['text'] = database.get_text(session['username']) except Exception as e: log(e) options['errno'] = 500 options['error'] = str(e) if 'errno' in options: return json.dumps(options), options['errno'] return json.dumps(options), 200
def get_images(self, username): log('Retrieving all images from images_' + username) # Notice image['data'] was encoded using Base64. return [ image for image in self.get_image_collection(username).find( {}, {'_id': 0}) ]
def infer(self, LUCID, service_graph, text_data, image_data): # Create the list of QueryInput. query_input_list = [] for node in service_graph.node_list: service = self.SERVICES[node.service_name] data = text_data if service.input_type == 'text' else image_data host, port = service.get_host_port() tag_list = [host, str(port), str(len(node.to_indices))] for to_index in node.to_indices: tag_list.append(str(to_index)) query_input_list.append(self.create_query_input( service.input_type, data, tag_list)) query_spec = self.create_query_spec('query', query_input_list) # Go through all starting indices and send requests. result = [] for start_index in service_graph.starting_indices: service = self.SERVICES[service_graph.get_node( start_index).service_name] client, transport = self.get_client_transport(service) log('Sending infer request to ' + service.name) request = Request() request.LUCID = str(LUCID) request.spec = query_spec result.append(client.infer(Request).msg) #transport.close() return ' '.join(result)
def send_query(self, LUCID, service_name, query_input_list): query_spec = self.create_query_spec('query', query_input_list) service = self.SERVICES[service_name] client, transport = self.get_client_transport(service) log('Sending infer request to ' + service.name) result = client.infer(str(LUCID), query_spec) transport.close() return result
def __init__(self): memcached_addr = os.environ.get('MEMCACHED_PORT_11211_TCP_ADDR') if memcached_addr: log('Memcached: ' + memcached_addr) self.client = Client([(memcached_addr, 11211)]) else: log('Memcached: localhost') self.client = Client([('127.0.0.1', 11211)])
def __init__(self): mongodb_addr = os.environ.get('MONGO_PORT_27017_TCP_ADDR') if mongodb_addr: log('MongoDB: ' + mongodb_addr) self.db = MongoClient(mongodb_addr, 27017).lucida else: log('MongoDB: localhost') self.db = MongoClient().lucida self.users = self.db.users
def learn_image(self, LUCID, image_type, image_data, label): for service in Config.Service.LEARNERS['image']: # add concurrency? knowledge_input = self.create_query_input( image_type, image_data, [label]) client, transport = self.get_client_transport(service) log('Sending learn_image request to IMM') client.learn(str(LUCID), self.create_query_spec('knowledge', [knowledge_input])) transport.close()
def learn_text(self, LUCID, text_type, text_data, text_id): for service in Config.Service.LEARNERS['text']: # add concurrency? knowledge_input = self.create_query_input( text_type, text_data, [text_id]) client, transport = self.get_client_transport(service) log('Sending learn_text request to QA') client.learn(str(LUCID), self.create_query_spec('knowledge', [knowledge_input])) transport.close()
def __init__(self): if os.environ.get('MONGO_PORT_27017_TCP_ADDR'): log('MongoDB: ' + os.environ.get('MONGO_PORT_27017_TCP_ADDR')) self.db = MongoClient(os.environ.get('MONGO_PORT_27017_TCP_ADDR'), 27017).lucida else: log('MongoDB: localhost') self.db = MongoClient().lucida self.users = self.db.users
def infer_route(): options = {} if os.environ.get('ASR_ADDR_PORT'): options['asr_addr_port'] = os.environ.get('ASR_ADDR_PORT') else: options['asr_addr_port'] = 'ws://*****:*****@@@@@@@@@@', speech_input image_input = [upload_file.read()] if upload_file else None lucida_id = session['username'] # Check if context is saved for Lucida user # If not, classify query, otherwise restore session if lucida_id not in Config.SESSION: services_needed = \ query_classifier.predict(speech_input, upload_file) speech_input = [speech_input] else: services_needed = Config.SESSION[lucida_id]['graph'] Config.SESSION[lucida_id]['data']['text'].append( speech_input) speech_input = Config.SESSION[lucida_id]['data']['text'] options['result'] = thrift_client.infer(lucida_id, \ services_needed, speech_input, image_input) log('Result ' + options['result']) # Check if Calendar service is needed. # If so, JavaScript needs to receive the parsed dates. if services_needed.has_service('CA'): options['dates'] = options['result'] options['result'] = None return render_template('infer.html', **options) else: raise RuntimeError('Did you click the Ask button?') except Exception as e: log(e) if str(e) == 'TSocket read 0 bytes': e = 'Back-end service encountered a problem' options['error'] = e return render_template('infer.html', **options) # Display. return render_template('infer.html', **options)
def get_host_port(self): try: host = 'localhost' tcp_addr = os.environ.get(self.name + '_PORT_' + str(self.port) + '_TCP_ADDR') if tcp_addr: log('TCP address is resolved to ' + tcp_addr) host = tcp_addr return host, self.port except Exception: raise RuntimeError('Cannot access service ' + self.name)
def learn_image(self, LUCID, image_type, image_data, image_id): for service in Config.Service.LEARNERS['image']: # add concurrency? knowledge_input = self.create_query_input(image_type, image_data, [image_id]) client, transport = self.get_client_transport(service) log('Sending learn_image request to IMM') client.learn( str(LUCID), self.create_query_spec('knowledge', [knowledge_input])) transport.close()
def learn_text(self, LUCID, text_type, text_data, text_id): for service in Config.Service.LEARNERS['text']: # add concurrency? knowledge_input = self.create_query_input(text_type, text_data, [text_id]) client, transport = self.get_client_transport(service) log('Sending learn_text request to QA') client.learn( str(LUCID), self.create_query_spec('knowledge', [knowledge_input])) transport.close()
def generic_infer_route(form, upload_file): options = {} if os.environ.get('ASR_ADDR_PORT'): options['asr_addr_port'] = os.environ.get('ASR_ADDR_PORT') else: options['asr_addr_port'] = 'ws://localhost:' + port_dic["cmd_port"] try: # Deal with POST requests. if request.method == 'POST': if not upload_file is None and upload_file.filename != '': check_image_extension(upload_file) # Classify the query. speech_input = form['speech_input'] if 'speech_input' in form \ else '' print 'Query: ', speech_input image_input = [upload_file.read()] if upload_file else None lucida_id = session['username'] # Check if context is saved for Lucida user # If not, classify query, otherwise restore session if lucida_id not in Config.SESSION: services_needed = query_classifier.predict( speech_input, upload_file) speech_input = [speech_input] else: services_needed = Config.SESSION[lucida_id]['graph'] Config.SESSION[lucida_id]['data']['text'].append(speech_input) speech_input = Config.SESSION[lucida_id]['data']['text'] node = services_needed.get_node(0) try: options['result'] = thrift_client.infer( lucida_id, node.service_name, speech_input, image_input) except Exception as ex: print "Exception raised while trying to infer", ex.message options['error'] = str(ex) raise if 'result' in options: log('Result ' + options['result']) # Check if Calendar service is needed. # If so, JavaScript needs to receive the parsed dates. if services_needed.has_service('CAWF'): options['dates'] = options['result'] options['result'] = None else: options['error'] = "Result was empty." except Exception as e: log(e) options['errno'] = "Unknown" options['error'] = str(e.message) if 'code' in e and re.match("^4\d\d$", str(e.code)): options['errno'] = e.code if str(e) == 'TSocket read 0 bytes': options['error'] = 'Back-end service encountered a problem' if str(e).startswith('Could not connect to'): options['error'] = 'Back-end service is not running' return options
def calc_average_value(temperatures, chem_pot_range, prop_array, prop_name, Wm_array, _accuracy, options, temp_depend=False): """ Calculates average value of a system's property """ success = True error = "" log(__name__, "Calculating an average value of: %s" % (prop_name), options.verbose, indent=3) temp_len = len(temperatures) chem_pot_len = len(chem_pot_range) # is the value temperature dependent if not temp_depend: prop_len = len(prop_array) else: prop_len = len(prop_array[0]) avg_array = np.zeros([temp_len, chem_pot_len], _accuracy) # for each temperature: for t_index in range(temp_len): temperature = temperatures[t_index] for mu_index in range(chem_pot_len): prop_avg = _accuracy(0.0) for prop_index in range(prop_len): wm_value = Wm_array[t_index, mu_index, prop_index] # is the value temperature dependent if not temp_depend: prop_value = prop_array[prop_index] else: prop_value = prop_array[t_index][prop_index] prop_avg += wm_value * prop_value avg_array[t_index, mu_index] = prop_avg return success, error, avg_array
def get_host_port(self): try: host = 'localhost' # TODO - Need to replace with a dynamic service finding system - good candidate is to finish the full CREATE system tcp_addr = os.environ.get(self.name + '_PORT_' + str(self.port) + '_TCP_ADDR') if tcp_addr: log('TCP address is resolved to ' + tcp_addr) host = tcp_addr return host, self.port except Exception: raise RuntimeError('Cannot access service ' + self.name)
def get_host_port(self): try: host = 'localhost' tcp_addr = os.environ.get( self.name + '_PORT_' + str(self.port) + '_TCP_ADDR') if tcp_addr: log('TCP address is resolved to ' + tcp_addr) host = tcp_addr return host, self.port except Exception: raise RuntimeError('Cannot access service ' + self.name)
def distribution_analysis(chem_pot_multi, names, temperatures, chem_pot_range, min_energies, delta_E_sums, experiment_cnts, permutations, omega_c_arr, _accuracy, options): """ Performs the distribution analysis: evaluates Wm and plots it against m and mu. """ log(__name__, "Distribution analysis", options.verbose, indent=2) success = True error = "" # Preparing Wm probabilities Wm_array = prepare_Wm(chem_pot_multi, temperatures, chem_pot_range, min_energies, delta_E_sums, experiment_cnts, permutations, _accuracy, options) # Writing Wm into a file success, error = IO.write_Wm(temperatures, chem_pot_range, chem_pot_multi, Wm_array) if not success: return success, error, Wm_array # Plotting the Wm probabilities 3D plots Graphs.wm_contour(temperatures, names, chem_pot_range, chem_pot_multi, Wm_array, _accuracy, options) # Performing analysis with respect to the distribution function (average m is the standard analysis) # average m average_analysis(temperatures, chem_pot_range, chem_pot_multi, "m", Wm_array, _accuracy, options, temp_depend=False) # average gamma average_analysis(temperatures, chem_pot_range, omega_c_arr, "\gamma^{c}", Wm_array, _accuracy, options, temp_depend=True) return success, error, Wm_array
def learn_route(): options = {} # Deal with POST requests. if request.method == 'POST': options = generic_learn_route(request.form['op'], request.form, request.files['file'] if 'file' in request.files else None) try: # Retrieve knowledge. options['pictures'] = database.get_images(session['username']) options['text'] = database.get_text(session['username']) except Exception as e: log(e) options['errno'] = 500 options['error'] = str(e) return render_template('learn.html', **options)
def prepare_data(data, temperatures, _accuracy, options): """ Prepares the data for further analysis: energy difference with respect to corresponding lowest energy, Zcm sums """ # first of all lets prepare the data for the calculations. log(__name__, "Preparing the data", options.verbose, indent=2) energies, min_energies, shifted_energies, experiment_cnts = prepare_energies(data, _accuracy, options) delta_E_sums = prepare_delta_E_sums(shifted_energies, temperatures, _accuracy, options) return energies, min_energies, shifted_energies, experiment_cnts, delta_E_sums
def _sendFinishedSignal(self, dest): """ Sends a signal to the server that it is finished its job """ log(__name__, "Worker %000d | sending a FINISHED response" % (self.rank), 1) dataPackage = { Config._MPIRankTag: self.rank, Config._MPISignalDataTag: Config._MPISignalFinished } self.comm.send(dataPackage, dest=dest, tag=Config._MPITagClient)
def _sendReadyForDataSignal(self, dest): """ Sends a signal to the server that it is ready to receive data """ log(__name__, "Worker %000d | sending a READY 4 DATA response" % (self.rank), 1) dataPackage = { Config._MPIRankTag: self.rank, Config._MPISignalDataTag: Config._MPISignalReady4Data } self.comm.send(dataPackage, dest=dest, tag=Config._MPITagClient)
def load(self, input_type, query_classes): current_dir = os.path.abspath(os.path.dirname(__file__)) # If there is no or only one possible outcomes for the input type, # there is no need to train any classifier. if len(query_classes) <= 1: return DummyClassifier(query_classes.keys()[0]) try: with open( current_dir + '/../models/dumped_classifier_' + input_type + '.pkl', 'rb') as fid: log('Loading model for ' + input_type) return cPickle.load(fid) except IOError as e: print e exit(1)
def load(self, input_type, query_classes): current_dir = os.path.abspath(os.path.dirname(__file__)) # If there is no or only one possible outcomes for the input type, # there is no need to train any classifier. if len(query_classes) <= 1: return DummyClassifier(query_classes.keys()[0]) try: with open(current_dir + '/../models/dumped_classifier_' + input_type + '.pkl', 'rb') as fid: log('Loading model for ' + input_type) return cPickle.load(fid) except IOError as e: print e exit(1)
def learn_route(): options = {} # Deal with POST requests. if request.method == 'POST': options = generic_learn_route( request.form['op'], request.form, request.files['file'] if 'file' in request.files else None) try: # Retrieve knowledge. options['pictures'] = database.get_images(session['username']) options['text'] = database.get_text(session['username']) except Exception as e: log(e) options['errno'] = 500 options['error'] = str(e) return render_template('learn.html', **options)
def g_c_calc_omega(chem_pot_multi, temperatures, chem_pot_range, min_energies, delta_E_sums, experiment_cnts, permutations, _accuracy, options): """ Calculates omega values with respect to temperature and chemical potential (grand canonical analysis) """ success = True error = "" log(__name__, "Calculating Omega (grand canonical)", options.verbose, indent=3) temp_len = len(temperatures) chem_pot_len = len(chem_pot_range) chem_pot_multi_len = len(chem_pot_multi) global_min_energy = np.min(min_energies) omega_arr = np.zeros([temp_len, chem_pot_len], _accuracy) # for each temperature: for t_index in range(temp_len): temperature = temperatures[t_index] kT = np.longdouble(Constants.kB * temperature) # for each chemical potential value for mu_index in range(chem_pot_len): mu_value = chem_pot_range[mu_index] sum2 = _accuracy(0.0) # for each composition for m_index in range(chem_pot_multi_len): m_value = chem_pot_multi[m_index] min_energy = min_energies[m_index] exp_expr = _accuracy(-1.0*(min_energy - global_min_energy + m_value*mu_value) / (kT)) sum2 += np.exp(exp_expr) * (permutations[m_index]/experiment_cnts[m_index]) * delta_E_sums[m_index][t_index] omega_value = -kT * (-global_min_energy + np.log(sum2)) omega_arr[t_index, mu_index] = omega_value return success, error, omega_arr
def _sendFailedSignal(self, dest, failedMessage): """ Sends a signal to the server that the job has failed """ log( __name__, "Worker %000d | sending a FAILURE response: %s" % (self.rank, failedMessage), 1) dataPackage = { Config._MPIRankTag: self.rank, Config._MPISignalDataTag: Config._MPISignalFailed, Config._MPIMessageTag: failedMessage } self.comm.send(dataPackage, dest=dest, tag=Config._MPITagClient)
def predict(self, speech_input, image_input): input_type = '' if speech_input: if image_input: input_type = 'text_image' else: input_type = 'text' else: if image_input: input_type = 'image' else: raise RuntimeError('Text and image cannot be both empty') # Convert speech_input to a single-element list. class_predicted = self.classifiers[input_type].predict([speech_input]) class_predicted = class_predicted[0] # ndarray to string log('Query classified as ' + class_predicted) return self.CLASSIFIER_DESCRIPTIONS[input_type][class_predicted]
def generic_infer_route(form, upload_file): options = {} if os.environ.get('ASR_ADDR_PORT'): options['asr_addr_port'] = os.environ.get('ASR_ADDR_PORT') else: options['asr_addr_port'] = 'ws://*****:*****@@@@@@@@@@', speech_input image_input = [upload_file.read()] if upload_file else None lucida_id = session['username'] # Check if context is saved for Lucida user # If not, classify query, otherwise restore session if lucida_id not in Config.SESSION: services_needed = query_classifier.predict(speech_input, upload_file) speech_input = [speech_input] else: services_needed = Config.SESSION[lucida_id]['graph'] Config.SESSION[lucida_id]['data']['text'].append(speech_input) speech_input = Config.SESSION[lucida_id]['data']['text'] node = services_needed.get_node(0) options['result'] = thrift_client.infer(lucida_id, node.service_name, speech_input, image_input) log('Result ' + options['result']) # Check if Calendar service is needed. # If so, JavaScript needs to receive the parsed dates. if services_needed.has_service('CA'): options['dates'] = options['result'] options['result'] = None except Exception as e: log(e) options['errno'] = 500 options['error'] = str(e) if 'code' in e and re.match("^4\d\d$", str(e.code)): options['errno'] = e.code if str(e) == 'TSocket read 0 bytes': options['error'] = 'Back-end service encountered a problem' if str(e).startswith('Could not connect to'): options['error'] = 'Back-end service is not running' return options
def _sendStopSignal(self, dest): """ Sends a signal to the server that it is stopping """ log(__name__, "Worker %000d | received a STOP signal" % (self.rank), 1) log(__name__, "Worker %000d | sending a QUIT response" % (self.rank), 1) # sending QUIT message to the server dataPackage = { Config._MPIRankTag: self.rank, Config._MPISignalDataTag: Config._MPISignalQuit } self.comm.send(dataPackage, dest=dest, tag=Config._MPITagClient)
def __init__(self, TRAIN_OR_LOAD, CLASSIFIER_DESCRIPTIONS_IN): self.CLASSIFIER_DESCRIPTIONS = CLASSIFIER_DESCRIPTIONS_IN self.classifiers = {} # Each input type has its own classifier. for input_type in self.CLASSIFIER_DESCRIPTIONS: # query_classes represents all the possible classification outcomes # and their needed services for a given input type. if TRAIN_OR_LOAD == 'train': self.classifiers[input_type] = self.train(input_type, self.CLASSIFIER_DESCRIPTIONS[input_type]) elif TRAIN_OR_LOAD == 'load': self.classifiers[input_type] = self.load(input_type, self.CLASSIFIER_DESCRIPTIONS[input_type]) else: raise RuntimeError( 'TRAIN_OR_LOAD must be either "train" or "load"') log('@@@@@ Summary of classifiers:') log(str(self.classifiers))
def __init__(self, TRAIN_OR_LOAD, CLASSIFIER_DESCRIPTIONS_IN): self.CLASSIFIER_DESCRIPTIONS = CLASSIFIER_DESCRIPTIONS_IN self.classifiers = {} # Each input type has its own classifier. for input_type in self.CLASSIFIER_DESCRIPTIONS: # query_classes represents all the possible classification outcomes # and their needed services for a given input type. if TRAIN_OR_LOAD == 'train': self.classifiers[input_type] = self.train( input_type, self.CLASSIFIER_DESCRIPTIONS[input_type]) elif TRAIN_OR_LOAD == 'load': self.classifiers[input_type] = self.load( input_type, self.CLASSIFIER_DESCRIPTIONS[input_type]) else: raise RuntimeError( 'TRAIN_OR_LOAD must be either "train" or "load"') log('@@@@@ Summary of classifiers:') log(str(self.classifiers))
def c_calc_gamma(temperatures, min_energies, delta_E_sums, experiment_cnts, permutations, _accuracy, options): """ Calculates omega values with respect to temperature (canonical analysis) """ success = True error = "" log(__name__, "Calculating Omega (canonical)", options.verbose, indent=3) temp_len = len(temperatures) comp_len = len(min_energies) omega_arr = np.zeros([temp_len, comp_len], _accuracy) # for each temperature: for t_index in range(temp_len): temperature = temperatures[t_index] kT = np.longdouble(Constants.kB * temperature) # for each composition for c_index in range(comp_len): # calculating Z^c_m # Pm if options.permCalc: Pm = _accuracy(calc_permutation(m_value, mm_value, _accuracy)) else: Pm = _accuracy(permutations[c_index]) # Nm Nm = experiment_cnts[c_index] #Z_cm = np.exp(-1.0*(min_energies[c_index]) / kT) * (Pm/Nm) * delta_E_sums[c_index][t_index] #omega_value = - kT * np.log(Z_cm) omega_value = - kT * (-1.0*(min_energies[c_index]) / kT + np.log((Pm/Nm)) + np.log(delta_E_sums[c_index][t_index])) omega_arr[t_index, c_index] = omega_value / Constants.gamma_c_m_coef return success, error, omega_arr
def infer_route(): options = {} if os.environ.get('ASR_ADDR_PORT'): options['asr_addr_port'] = os.environ.get('ASR_ADDR_PORT') else: options['asr_addr_port'] = 'ws://*****:*****@@@@@@@@@@', form['speech_input'] # Classify the query. services_needed = \ query_classifier.predict(form['speech_input'], upload_file) options['result'] = thrift_client.infer( session['username'], services_needed, form['speech_input'], upload_file.read()) log('Result ' + options['result']) # Check if Calendar service is needed. # If so, JavaScript needs to receive the parsed dates. if services_needed.has_service('CA'): options['dates'] = options['result'] options['result'] = None return render_template('infer.html', **options) else: raise RuntimeError('Did you click the Ask button?') except Exception as e: log(e) if str(e) == 'TSocket read 0 bytes': e = 'Back-end service encountered a problem' options['error'] = e return render_template('infer.html', **options) # Display. return render_template('infer.html', **options)
def infer_route(): options = {} if os.environ.get("ASR_ADDR_PORT"): options["asr_addr_port"] = os.environ.get("ASR_ADDR_PORT") else: options["asr_addr_port"] = "ws://*****:*****@@@@@@@@@@", form["speech_input"] # Classify the query. services_needed = query_classifier.predict(form["speech_input"], upload_file) options["result"] = thrift_client.infer( session["username"], services_needed, form["speech_input"], upload_file.read() ) log("Result " + options["result"]) # Check if Calendar service is needed. # If so, JavaScript needs to receive the parsed dates. if services_needed.has_service("CA"): options["dates"] = options["result"] options["result"] = None return render_template("infer.html", **options) else: raise RuntimeError("Did you click the Ask button?") except Exception as e: log(e) if str(e) == "TSocket read 0 bytes": e = "Back-end service encountered a problem" options["error"] = e return render_template("infer.html", **options) # Display. return render_template("infer.html", **options)
def infer(self, LUCID, service_graph, text_data, image_data): # Create the list of QueryInput. query_input_list = [] for node in service_graph.node_list: service = self.SERVICES[node.service_name] data = text_data if service.input_type == 'text' else image_data host, port = service.get_host_port() tag_list = [host, str(port), str(len(node.to_indices))] for to_index in node.to_indices: tag_list.append(str(to_index)) query_input_list.append(self.create_query_input( service.input_type, data, tag_list)) query_spec = self.create_query_spec('query', query_input_list) # Go through all starting indices and send requests. result = [] for start_index in service_graph.starting_indices: service = self.SERVICES[service_graph.get_node( start_index).service_name] client, transport = self.get_client_transport(service) log('Sending infer request to ' + service.name) result.append(client.infer(str(LUCID), query_spec)) transport.close() return ' '.join(result)
def count_images(self, username): log('Retrieving the number of images from images_' + username) return self.get_image_collection(username).count()
def __init__(self, SERVICES): self.SERVICES = SERVICES log('Pre-configured services: ' + str(SERVICES))
def get_text(self, username): log('Retrieving text from text_' + username) return [text for text in self.get_text_collection(username).find()]
def generic_learn_route(op, form, upload_file): options = {} username = session['username'] try: # Add image knowledge. if op == 'add_image': image_type = 'image' label = form['label'] # Check the uploaded image. if upload_file.filename == '': raise RuntimeError('Empty file is not allowed') check_image_extension(upload_file) # Check the label of the image. check_text_input(label) # Check whether the user can add one more image. database.check_add_image(username) # Generate the id. image_data = upload_file.read() image_id = hashlib.md5(username + str(datetime.datetime.now())).hexdigest() # Send the image to IMM. upload_file.close() thrift_client.learn_image(username, image_type, image_data, image_id) # Add the image into the database. database.add_image(username, image_data, label, image_id) # Delete image knowledge. elif op == 'delete_image': image_type = 'unlearn' image_id = form['image_id'] # Send the unlearn request to IMM. thrift_client.learn_image(username, image_type, '', image_id) # Delete the image from the database. database.delete_image(username, image_id) # Add text knowledge. elif op == 'add_text' or op == 'add_url': text_type = 'text' if op == 'add_text' else 'url' text_data = form['knowledge'] # Check the text knowledge. check_text_input(text_data) # Check whether the user can add one more piece of text. database.check_add_text(username) # Generate the id. text_id = hashlib.md5(username + text_data + str(datetime.datetime.now())).hexdigest() # Send the text to QA. thrift_client.learn_text(username, text_type, text_data, text_id) # Add the text knowledge into the database. database.add_text(username, text_type, text_data, text_id) # Delete text knowledge. elif op == 'delete_text': text_type = 'unlearn' text_id = form['text_id'] # Send the unlearn request to QA. thrift_client.learn_text(username, text_type, '', text_id) # Delete the text from into the database. database.delete_text(username, text_id) else: raise RuntimeError('Did you click the button?') except Exception as e: log(e) options['errno'] = 500 options['error'] = str(e) if 'code' in e and re.match("^4\d\d$", str(e.code)): options['errno'] = e.code if str(e) == 'TSocket read 0 bytes': options['error'] = 'Back-end service encountered a problem' if str(e).startswith('Could not connect to'): options['error'] = 'Back-end service is not running' return options
def get_images(self, username): log('Retrieving all images from images_' + username) # Notice image['data'] was encoded using Base64. return [image for image in self.get_image_collection(username).find()]
def train(self, input_type, query_classes): log('********************** ' + input_type + ' **********************') current_dir = os.path.abspath(os.path.dirname(__file__)) # If there is no or only one possible outcomes for the input type, # there is no need to train any classifier. if len(query_classes) <= 1: return DummyClassifier(query_classes.keys()[0]) # Build DataFrame by going through all data files. data = DataFrame({'text': [], 'class': []}) for query_class_name in query_classes: path = current_dir + '/../data/' + query_class_name + '.txt' log('Opening ' + path) lines = [line.rstrip('\n') for line in open(path)] rows = [] index = [] for text in lines: if text in index: log('duplicate in ' + path + ": " + text) exit(1) rows.append({'text': text, 'class': query_class_name}) index.append(text) data = data.append(DataFrame(rows, index)) # Build the pipeline. pipeline = Pipeline([ ('count_vectorizer', CountVectorizer(ngram_range = (1, 2))), # ('classifier', PassiveAggressiveClassifier()) ('classifier', LinearSVC()) ]) # Train and k-fold cross-validate. Introduce randomness. data = data.reindex(numpy.random.permutation(data.index)) k_fold = KFold(n=len(data), n_folds=6) scores = [] for train_indices, test_indices in k_fold: train_text = data.iloc[train_indices]['text'].values train_y = data.iloc[train_indices]['class'].values.astype(str) test_text = data.iloc[test_indices]['text'].values test_y = data.iloc[test_indices]['class'].values.astype(str) pipeline.fit(train_text, train_y) predictions = pipeline.predict(test_text) score = f1_score(test_y, predictions, pos_label=None if len(query_classes) == 2 else 1, average='weighted') scores.append(score) log('Total documents classified:' + str(len(data))) log('Score:' + str(sum(scores) / len(scores))) # Save the classifier, if not os.path.exists(current_dir + '/../models'): os.makedirs(current_dir + '/../models') with open(current_dir + '/../models/dumped_classifier_' + input_type + '.pkl', 'wb') as fid: log('Saving model for ' + input_type) cPickle.dump(pipeline, fid) return pipeline
def learn_route(): options = {} username = session['username'] try: form = request.form # Deal with POST requests. if request.method == 'POST': # If the request does not contain an "op" field. if not 'op' in request.form: raise RuntimeError('Did you click the button?') # Add image knowledge. elif form['op'] == 'add_image': image_type = 'image' label = form['label'] # Check the uploaded image. upload_file = request.files['file'] if upload_file.filename == '': raise RuntimeError('Empty file is not allowed') check_image_extension(upload_file) # Check the label of the image. check_text_input(label) # Check whether the user can add one more image. database.check_add_image(username) # Send the image to IMM. image_data = upload_file.read() upload_file.close() thrift_client.learn_image(username, image_type, image_data, label) # Add the image into the database. database.add_image(username, image_data, label) # Delete image knowledge. elif form['op'] == 'delete_image': image_type = 'unlearn' label = form['label'] # Send the unlearn request to IMM. thrift_client.learn_image(username, image_type, '', label) # Delete the image from the database. database.delete_image(username, label) # Add text knowledge. elif form['op'] == 'add_text' or form['op'] == 'add_url': text_type = 'text' if form['op'] == 'add_text' else 'url' text_data = form['knowledge'] # Check the text knowledge. check_text_input(text_data) # Check whether the user can add one more piece of text. database.check_add_text(username) # Generate the id. text_id = hashlib.md5(username + text_data + str(datetime.datetime.now())).hexdigest() # Send the text to QA. thrift_client.learn_text(username, text_type, text_data, text_id) # Add the text knowledge into the database. database.add_text(username, text_type, text_data, text_id) # Delete text knowledge. elif form['op'] == 'delete_text': text_type = 'unlearn' text_id = form['text_id'] # Send the unlearn request to QA. thrift_client.learn_text(username, text_type, '', text_id) # Delete the text from into the database. database.delete_text(username, text_id) else: raise RuntimeError('Did you click the button?') except Exception as e: log(e) if str(e) == 'TSocket read 0 bytes': e = 'Back-end service encountered a problem' options['error'] = e try: # Retrieve knowledge. options['pictures'] = database.get_images(username) options['text'] = database.get_text(username) except Exception as e: log(e) options['error'] = e return render_template('learn.html', **options)