def classifier_predict(request=None): json_data = request.get_json() if not "model" in json_data: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) if not "subject_id" in json_data: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) if not "document" in json_data: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) model = json_data["model"] subject_id = json_data["subject_id"] document = json_data["document"] if len(model.strip()) == 0 or len( subject_id.strip()) == 0 or (type(document) is not dict): raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) if "label" in json_data: label = json_data["label"] else: label = None best_prediction_only = True prediction = sqlpie.Classifier.predict(model, subject_id, document, label, best_prediction_only) return {'success': True, 'result': prediction}
def service_search(request): json_data = request.get_json() query, tagcloud_search, geo_radius_search, geo_target_search = "", "", "", "" geo_sort_by = sqlpie.Searcher.SORT_BY_DISTANCE is_tagcloud_search = False is_geo_search = False num_results = 10 start_result = 0 if sqlpie.Searcher.QUERY_OPERATOR in json_data: query = json_data[sqlpie.Searcher.QUERY_OPERATOR] if sqlpie.Searcher.TAGCLOUD_OPERATOR in json_data: tagcloud_search = json_data[ sqlpie.Searcher.TAGCLOUD_OPERATOR].lower() if sqlpie.Searcher.GEO_RADIUS_OPERATOR in json_data: geo_radius_search = json_data[sqlpie.Searcher.GEO_RADIUS_OPERATOR] if sqlpie.Searcher.GEO_TARGET_OPERATOR in json_data: geo_target_search = json_data[ sqlpie.Searcher.GEO_TARGET_OPERATOR].lower() if sqlpie.Searcher.GEO_SORT_BY in json_data: geo_sort_by = json_data[sqlpie.Searcher.GEO_SORT_BY].lower() if sqlpie.Searcher.NUM_RESULTS in json_data: num_results = int(json_data[sqlpie.Searcher.NUM_RESULTS]) if sqlpie.Searcher.START_RESULT in json_data: start_result = int(json_data[sqlpie.Searcher.START_RESULT]) if tagcloud_search: if not tagcloud_search in [sqlpie.Searcher.SORT_TAGCLOUD_BY_RELEVANCE, \ sqlpie.Searcher.SORT_TAGCLOUD_BY_FREQUENCY]: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) else: is_tagcloud_search = True if geo_radius_search or geo_target_search: if not sqlpie.Util.is_number(geo_radius_search) or not geo_radius_search or \ not geo_target_search or not len(geo_target_search.split(",")) == 2 or \ not sqlpie.Util.is_number(geo_target_search.split(",")[0]) or \ not sqlpie.Util.is_number(geo_target_search.split(",")[1]) or \ geo_sort_by not in [sqlpie.Searcher.SORT_BY_RELEVANCE, sqlpie.Searcher.SORT_BY_DISTANCE]: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) else: is_geo_search = True engine = sqlpie.Searcher(query) if is_tagcloud_search: results = engine.run_tagcloud(tagcloud_search, num_results) elif is_geo_search: results = engine.run_geosearch(geo_radius_search, geo_target_search, num_results, start_result, geo_sort_by) else: results = engine.run_searcher(num_results, start_result) return {'success': True, 'results': results}
def classifier_clear(request=None): json_data = request.get_json() if not "model" in json_data: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) model = json_data["model"] if len(model.strip()) == 0: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) sqlpie.Classifier.clear(model) return {'success': True}
def classifier_init(request=None): json_data = request.get_json() if (not "model" in json_data) or (not "subject_bucket" in json_data ) or (not "predicate" in json_data): raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) model = json_data["model"] subject_bucket = json_data["subject_bucket"] predicate = json_data["predicate"] if len(model.strip()) == 0 or len(subject_bucket.strip()) == 0 or len( predicate.strip()) == 0: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) sqlpie.Classifier(model, subject_bucket, predicate) return {'success': True}
def load_data(filename): filename = os.path.dirname(__file__) + "/../data/" + filename try: with open(filename) as f: words = [line.strip('\n') for line in f] except IOError as e: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_STOPWORD_FILE) return words
def load(): json_file = open( os.path.dirname(__file__) + '/../../config/config.json') json_str = json_file.read() try: c = json.loads(json_str) except: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_CONFIG_FILE_FORMAT) return c
def service_recommend(request): json_data = request.get_json() if "subject_bucket" in json_data and "subject_id" in json_data and \ "object_bucket" in json_data and "object_id" not in json_data and \ "predicate" in json_data: subject_bucket = json_data["subject_bucket"] object_bucket = json_data["object_bucket"] subject_id = json_data["subject_id"] object_id = None predicate = json_data["predicate"] elif "object_bucket" in json_data and "object_id" in json_data and \ "subject_bucket" in json_data and "subject_id" not in json_data and \ "predicate" in json_data: subject_bucket = json_data["subject_bucket"] object_bucket = json_data["object_bucket"] object_id = json_data["object_id"] subject_id = None predicate = json_data["predicate"] else: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) if "metric" in json_data: metric = json_data["metric"] if metric != "pearson" and metric != "manhattan": raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) else: metric = "pearson" if "limit" in json_data and str(json_data["limit"]) == int( json_data["limit"]): limit = json_data["limit"] else: limit = 10 engine = sqlpie.Recommender(subject_bucket, object_bucket, subject_id, object_id, predicate) results = engine.recommendation(limit, metric) return {'success': True, 'results': results}
def classifier_train(request=None): json_data = request.get_json() if not "model" in json_data: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) if not "features" in json_data: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) use_numbers_as_weights = False if "options" in json_data: options = json_data["options"] if sqlpie.Classifier.USE_NUMBERS_AS_WEIGHTS_PARAM in options: use_numbers_as_weights = options[ sqlpie.Classifier.USE_NUMBERS_AS_WEIGHTS_PARAM] model = json_data["model"] features = json_data["features"] if len(model.strip()) == 0: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) sqlpie.Classifier.train(model, features, use_numbers_as_weights) return {'success': True}
def remove(self, key=None): with self.cache_lock: if key: key_id = sqlpie.Cache.convert_to_hash_key(key) if self.cache.has_key(key_id): del self.cache[key_id] self.dirty[key_id] = 0 else: raise sqlpie.CustomException( sqlpie.CustomException.CACHE_KEY_NOT_FOUND) else: sqlpie.Cache.remove(self.bucket) self.cache = OrderedDict() self.dirty = {} if self.auto_flush: self.flush()
def __init__(self, observation={}, is_subject_id_encoded=False, is_object_id_encoded=False): if not all(k in observation.keys() for k in ("subject_id", "predicate", "object_id")): raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) self.subject_id = observation["subject_id"] self.predicate_val = observation["predicate"] self.object_id = observation["object_id"] self.predicate_value = None self.predicate_type = 0 self.observation = observation self.is_compressed = False if "timestamp" in observation: self.created_at = sqlpie.Util.get_current_utc_from_timestamp( observation["timestamp"]) else: self.created_at = sqlpie.Util.get_current_utc_timestamp() if "value" in observation.keys(): self.predicate_value = json.dumps(observation["value"]) self.predicate_type = sqlpie.Predicate.convert_type( observation["value"]) if "subject_bucket" in observation.keys(): self.subject_bucket = observation["subject_bucket"] else: self.subject_bucket = sqlpie.bucket.Bucket.DEFAULT if "object_bucket" in observation.keys(): self.object_bucket = observation["object_bucket"] else: self.subject_bucket = sqlpie.bucket.Bucket.DEFAULT self.subject_bucket_id = sqlpie.Util.to_sha1(self.subject_bucket) self.object_bucket_id = sqlpie.Util.to_sha1(self.object_bucket) if not is_subject_id_encoded: self.subject_id = sqlpie.Util.to_sha1(self.subject_id) if not is_object_id_encoded: self.object_id = sqlpie.Util.to_sha1(self.object_id)
def service_summarization(request): json_data = request.get_json() if "bucket" in json_data and "documents" in json_data and \ sqlpie.Predicate.convert_type(json_data["documents"], False) == sqlpie.Predicate.IS_LIST: bucket = json_data["bucket"] document_ids = json_data["documents"] documents = [] elif "bucket" not in json_data and "documents" in json_data and \ sqlpie.Predicate.convert_type(json_data["documents"], False) == sqlpie.Predicate.IS_LIST: bucket = None document_ids = None documents = json_data["documents"] else: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) options = {} if "options" in json_data: json_options = json_data["options"] if "max_sentences" in json_options and str( json_options["max_sentences"]) == str( int(json_options["max_sentences"])): options["max_sentences"] = json_options["max_sentences"] if "max_summary_size" in json_options and str( json_options["max_summary_size"]) == str( int(json_options["max_summary_size"])): options["max_summary_size"] = json_options["max_summary_size"] if "max_summary_percent" in json_options and str( json_options["max_summary_percent"]) == int( json_options["max_summary_percent"]): options["max_summary_percent"] = json_options[ "max_summary_percent"] if "max_keywords" in json_options and str( json_options["max_keywords"]) == int( json_options["max_keywords"]): options["max_keywords"] = json_options["max_keywords"] if "fields_to_summarize" in json_options: options["fields_to_summarize"] = json_options[ "fields_to_summarize"] engine = sqlpie.Summarizer(bucket, document_ids, documents) results = engine.summarize(options) return {'success': True, 'results': results}
def __init__(self, document={}, parsers=[]): """ Args: document (object) : bucket (str) : Name of the bucket """ if sqlpie.Document.ID_FIELD in document.keys(): self.document_id = sqlpie.Util.to_sha1( unicode(document[sqlpie.Document.ID_FIELD])) else: unique_identifier = sqlpie.Util.get_unique_identifier() self.document_id = sqlpie.Util.to_sha1(unique_identifier) document[sqlpie.Document.ID_FIELD] = unique_identifier if sqlpie.Document.BUCKET_FIELD in document.keys(): self.bucket = document[sqlpie.Document.BUCKET_FIELD] else: self.bucket = sqlpie.Bucket.DEFAULT self.document = document self.id = None self.bucket_id = None self.is_compressed = None self.state = Document.IS_NOT_INDEXED self.created_at = None self.data = None self.parsers = {} for p in parsers: filename = os.path.dirname( os.path.realpath(__file__)) + "/../parsers/" + p + ".py" if os.path.isfile(filename): try: source = open(filename, "r").read() code = compile(source, '<string>', 'exec') self.parsers[p] = code except: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_PARSER)
def _conditions_to_sql(conditions): sql_statements = [] sql_replacement = [] tokens_requiring_encoding = [ "subject_bucket", "subject_id", "object_bucket", "object_id", "predicate" ] bucket_tokens = ["subject_bucket", "object_bucket"] predicate_token = ["predicate"] timestamp_token = ["created_at"] field_replacements = { "subject_bucket": "subject_bucket_id", "object_bucket": "object_bucket_id", "subject_id": "subject_id", "object_id": "object_id", "predicate": "predicate_id", "value": "predicate_value", "timestamp": "created_at", "options": "options" } valid_tokens = field_replacements.keys() options = {"limit": 10, "offset": 0} if "options" in conditions.keys(): if "limit" in conditions["options"]: options["limit"] = conditions["options"]["limit"] if "offset" in conditions["options"]: options["offset"] = conditions["options"]["offset"] for k in conditions.keys(): k = k.lower() if k in valid_tokens: v = conditions[k] v_type = type(v).__name__ if v_type == "list": if len(v) > 0: if k in tokens_requiring_encoding: sql_string_list = [] for item in v: if k in bucket_tokens: b = sqlpie.Bucket(item) lv = b.bucket_id elif k in predicate_token: p = sqlpie.Predicate(item) lv = p.predicate_id else: lv = sqlpie.Term.get_key(item) sql_string_list.append("UNHEX(%s)") sql_replacement.append(lv) else: sql_string_list = [] for item in v: sql_string_list.append("%s") sql_replacement.append(item) k = field_replacements[k] sql_statements.append(k + " in (" + ",".join(sql_string_list) + ")") else: if k in tokens_requiring_encoding: if k in bucket_tokens: b = sqlpie.Bucket(v) v = b.bucket_id elif k in predicate_token: p = sqlpie.Predicate(v) v = p.predicate_id else: v = sqlpie.Term.get_key(v) k = field_replacements[k] sql_statements.append(k + " = UNHEX(%s)") sql_replacement.append(v) else: k = field_replacements[k] if type(v).__name__ == "dict": if v.has_key("start") or v.has_key("end"): if v.has_key("start"): if k in timestamp_token: condition = k + " >= FROM_UNIXTIME(%s)" else: condition = k + " >= %s" sql_statements.append(condition) sql_replacement.append(v["start"]) if v.has_key("end"): if k in timestamp_token: condition = k + " <= FROM_UNIXTIME(%s)" else: condition = k + " <= %s" sql_statements.append(condition) sql_replacement.append(v["end"]) else: sql_statements.append(k + " = %s") sql_replacement.append(v) else: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) return (sql_statements, sql_replacement, options)
def matching(request=None): json_data = request.get_json() results = [] if "num_results" in json_data: num_results = int(json_data["num_results"]) else: num_results = 1 if "filter_query" in json_data: filter_query = json_data["filter_query"] else: filter_query = "" if "output_predicate" in json_data: output_predicate = json_data["output_predicate"] else: output_predicate = None valid_keys = [ "num_results", "filter_query", "output_predicate", "bucket", "document_id", "search_bucket", "document" ] if not all(k in valid_keys for k in json_data.keys()): raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) elif ("bucket" in json_data and "document_id" in json_data and "search_bucket" in json_data) and not \ (len(json_data["bucket"].strip()) == 0 or len(json_data["document_id"].strip()) == 0 or len(json_data["search_bucket"].strip()) == 0): bucket = json_data["bucket"] document_id = json_data["document_id"] search_bucket = json_data["search_bucket"] matcher = sqlpie.Matcher() results = matcher.match_single(bucket, document_id, search_bucket, num_results, filter_query) resp = {'success': True, 'results': results} elif ("bucket" in json_data and (not "document_id" in json_data) and "search_bucket" in json_data) and not \ (len(json_data["bucket"].strip()) == 0 or len(json_data["search_bucket"].strip()) == 0): bucket = json_data["bucket"] search_bucket = json_data["search_bucket"] matcher = sqlpie.Matcher() total_matches, output_predicate = matcher.match_all( bucket, search_bucket, num_results, filter_query, output_predicate) resp = { 'success': True, 'total_matches': total_matches, 'output_predicate': output_predicate } elif ("document" in json_data) and ("search_bucket" in json_data) and len( unicode(json_data["document"]).strip()) > 0 and len( json_data["search_bucket"].strip()) > 0: document = json_data["document"] search_bucket = json_data["search_bucket"] matcher = sqlpie.Matcher() results = matcher.match_document(document, search_bucket, num_results, filter_query) resp = {'success': True, 'results': results} else: raise sqlpie.CustomException( sqlpie.CustomException.INVALID_ARGUMENTS) return resp