def classifier_predict(request=None):
     json_data = request.get_json()
     if not "model" in json_data:
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_ARGUMENTS)
     if not "subject_id" in json_data:
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_ARGUMENTS)
     if not "document" in json_data:
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_ARGUMENTS)
     model = json_data["model"]
     subject_id = json_data["subject_id"]
     document = json_data["document"]
     if len(model.strip()) == 0 or len(
             subject_id.strip()) == 0 or (type(document) is not dict):
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_ARGUMENTS)
     if "label" in json_data:
         label = json_data["label"]
     else:
         label = None
     best_prediction_only = True
     prediction = sqlpie.Classifier.predict(model, subject_id, document,
                                            label, best_prediction_only)
     return {'success': True, 'result': prediction}
Example #2
0
    def service_search(request):
        json_data = request.get_json()
        query, tagcloud_search, geo_radius_search, geo_target_search = "", "", "", ""
        geo_sort_by = sqlpie.Searcher.SORT_BY_DISTANCE
        is_tagcloud_search = False
        is_geo_search = False
        num_results = 10
        start_result = 0

        if sqlpie.Searcher.QUERY_OPERATOR in json_data:
            query = json_data[sqlpie.Searcher.QUERY_OPERATOR]
        if sqlpie.Searcher.TAGCLOUD_OPERATOR in json_data:
            tagcloud_search = json_data[
                sqlpie.Searcher.TAGCLOUD_OPERATOR].lower()
        if sqlpie.Searcher.GEO_RADIUS_OPERATOR in json_data:
            geo_radius_search = json_data[sqlpie.Searcher.GEO_RADIUS_OPERATOR]
        if sqlpie.Searcher.GEO_TARGET_OPERATOR in json_data:
            geo_target_search = json_data[
                sqlpie.Searcher.GEO_TARGET_OPERATOR].lower()
        if sqlpie.Searcher.GEO_SORT_BY in json_data:
            geo_sort_by = json_data[sqlpie.Searcher.GEO_SORT_BY].lower()
        if sqlpie.Searcher.NUM_RESULTS in json_data:
            num_results = int(json_data[sqlpie.Searcher.NUM_RESULTS])
        if sqlpie.Searcher.START_RESULT in json_data:
            start_result = int(json_data[sqlpie.Searcher.START_RESULT])

        if tagcloud_search:
            if not tagcloud_search in [sqlpie.Searcher.SORT_TAGCLOUD_BY_RELEVANCE, \
                                      sqlpie.Searcher.SORT_TAGCLOUD_BY_FREQUENCY]:
                raise sqlpie.CustomException(
                    sqlpie.CustomException.INVALID_ARGUMENTS)
            else:
                is_tagcloud_search = True
        if geo_radius_search or geo_target_search:
            if not sqlpie.Util.is_number(geo_radius_search) or not geo_radius_search or \
                not geo_target_search or not len(geo_target_search.split(",")) == 2 or \
                not sqlpie.Util.is_number(geo_target_search.split(",")[0]) or \
                not sqlpie.Util.is_number(geo_target_search.split(",")[1]) or \
                geo_sort_by not in [sqlpie.Searcher.SORT_BY_RELEVANCE, sqlpie.Searcher.SORT_BY_DISTANCE]:
                raise sqlpie.CustomException(
                    sqlpie.CustomException.INVALID_ARGUMENTS)
            else:
                is_geo_search = True

        engine = sqlpie.Searcher(query)
        if is_tagcloud_search:
            results = engine.run_tagcloud(tagcloud_search, num_results)
        elif is_geo_search:
            results = engine.run_geosearch(geo_radius_search,
                                           geo_target_search, num_results,
                                           start_result, geo_sort_by)
        else:
            results = engine.run_searcher(num_results, start_result)

        return {'success': True, 'results': results}
 def classifier_clear(request=None):
     json_data = request.get_json()
     if not "model" in json_data:
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_ARGUMENTS)
     model = json_data["model"]
     if len(model.strip()) == 0:
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_ARGUMENTS)
     sqlpie.Classifier.clear(model)
     return {'success': True}
 def classifier_init(request=None):
     json_data = request.get_json()
     if (not "model" in json_data) or (not "subject_bucket" in json_data
                                       ) or (not "predicate" in json_data):
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_ARGUMENTS)
     model = json_data["model"]
     subject_bucket = json_data["subject_bucket"]
     predicate = json_data["predicate"]
     if len(model.strip()) == 0 or len(subject_bucket.strip()) == 0 or len(
             predicate.strip()) == 0:
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_ARGUMENTS)
     sqlpie.Classifier(model, subject_bucket, predicate)
     return {'success': True}
Example #5
0
 def load_data(filename):
     filename = os.path.dirname(__file__) + "/../data/" + filename
     try:
         with open(filename) as f:
             words = [line.strip('\n') for line in f]
     except IOError as e:
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_STOPWORD_FILE)
     return words
Example #6
0
 def load():
     json_file = open(
         os.path.dirname(__file__) + '/../../config/config.json')
     json_str = json_file.read()
     try:
         c = json.loads(json_str)
     except:
         raise sqlpie.CustomException(
             sqlpie.CustomException.INVALID_CONFIG_FILE_FORMAT)
     return c
    def service_recommend(request):
        json_data = request.get_json()
        if "subject_bucket" in json_data and "subject_id" in json_data and \
            "object_bucket" in json_data and "object_id" not in json_data and \
            "predicate" in json_data:
            subject_bucket = json_data["subject_bucket"]
            object_bucket = json_data["object_bucket"]
            subject_id = json_data["subject_id"]
            object_id = None
            predicate = json_data["predicate"]
        elif "object_bucket" in json_data and "object_id" in json_data and \
            "subject_bucket" in json_data and "subject_id" not in json_data and \
            "predicate" in json_data:
            subject_bucket = json_data["subject_bucket"]
            object_bucket = json_data["object_bucket"]
            object_id = json_data["object_id"]
            subject_id = None
            predicate = json_data["predicate"]
        else:
            raise sqlpie.CustomException(
                sqlpie.CustomException.INVALID_ARGUMENTS)

        if "metric" in json_data:
            metric = json_data["metric"]
            if metric != "pearson" and metric != "manhattan":
                raise sqlpie.CustomException(
                    sqlpie.CustomException.INVALID_ARGUMENTS)
        else:
            metric = "pearson"

        if "limit" in json_data and str(json_data["limit"]) == int(
                json_data["limit"]):
            limit = json_data["limit"]
        else:
            limit = 10

        engine = sqlpie.Recommender(subject_bucket, object_bucket, subject_id,
                                    object_id, predicate)
        results = engine.recommendation(limit, metric)
        return {'success': True, 'results': results}
    def classifier_train(request=None):
        json_data = request.get_json()
        if not "model" in json_data:
            raise sqlpie.CustomException(
                sqlpie.CustomException.INVALID_ARGUMENTS)
        if not "features" in json_data:
            raise sqlpie.CustomException(
                sqlpie.CustomException.INVALID_ARGUMENTS)

        use_numbers_as_weights = False
        if "options" in json_data:
            options = json_data["options"]
            if sqlpie.Classifier.USE_NUMBERS_AS_WEIGHTS_PARAM in options:
                use_numbers_as_weights = options[
                    sqlpie.Classifier.USE_NUMBERS_AS_WEIGHTS_PARAM]

        model = json_data["model"]
        features = json_data["features"]
        if len(model.strip()) == 0:
            raise sqlpie.CustomException(
                sqlpie.CustomException.INVALID_ARGUMENTS)
        sqlpie.Classifier.train(model, features, use_numbers_as_weights)
        return {'success': True}
Example #9
0
 def remove(self, key=None):
     with self.cache_lock:
         if key:
             key_id = sqlpie.Cache.convert_to_hash_key(key)
             if self.cache.has_key(key_id):
                 del self.cache[key_id]
                 self.dirty[key_id] = 0
             else:
                 raise sqlpie.CustomException(
                     sqlpie.CustomException.CACHE_KEY_NOT_FOUND)
         else:
             sqlpie.Cache.remove(self.bucket)
             self.cache = OrderedDict()
             self.dirty = {}
     if self.auto_flush:
         self.flush()
Example #10
0
    def __init__(self,
                 observation={},
                 is_subject_id_encoded=False,
                 is_object_id_encoded=False):
        if not all(k in observation.keys()
                   for k in ("subject_id", "predicate", "object_id")):
            raise sqlpie.CustomException(
                sqlpie.CustomException.INVALID_ARGUMENTS)

        self.subject_id = observation["subject_id"]
        self.predicate_val = observation["predicate"]
        self.object_id = observation["object_id"]

        self.predicate_value = None
        self.predicate_type = 0
        self.observation = observation
        self.is_compressed = False

        if "timestamp" in observation:
            self.created_at = sqlpie.Util.get_current_utc_from_timestamp(
                observation["timestamp"])
        else:
            self.created_at = sqlpie.Util.get_current_utc_timestamp()

        if "value" in observation.keys():
            self.predicate_value = json.dumps(observation["value"])
            self.predicate_type = sqlpie.Predicate.convert_type(
                observation["value"])

        if "subject_bucket" in observation.keys():
            self.subject_bucket = observation["subject_bucket"]
        else:
            self.subject_bucket = sqlpie.bucket.Bucket.DEFAULT

        if "object_bucket" in observation.keys():
            self.object_bucket = observation["object_bucket"]
        else:
            self.subject_bucket = sqlpie.bucket.Bucket.DEFAULT

        self.subject_bucket_id = sqlpie.Util.to_sha1(self.subject_bucket)
        self.object_bucket_id = sqlpie.Util.to_sha1(self.object_bucket)

        if not is_subject_id_encoded:
            self.subject_id = sqlpie.Util.to_sha1(self.subject_id)
        if not is_object_id_encoded:
            self.object_id = sqlpie.Util.to_sha1(self.object_id)
    def service_summarization(request):
        json_data = request.get_json()
        if "bucket" in json_data and "documents" in json_data and \
          sqlpie.Predicate.convert_type(json_data["documents"], False) == sqlpie.Predicate.IS_LIST:
            bucket = json_data["bucket"]
            document_ids = json_data["documents"]
            documents = []
        elif "bucket" not in json_data and "documents" in json_data and \
          sqlpie.Predicate.convert_type(json_data["documents"], False) == sqlpie.Predicate.IS_LIST:
            bucket = None
            document_ids = None
            documents = json_data["documents"]
        else:
            raise sqlpie.CustomException(
                sqlpie.CustomException.INVALID_ARGUMENTS)

        options = {}
        if "options" in json_data:
            json_options = json_data["options"]
            if "max_sentences" in json_options and str(
                    json_options["max_sentences"]) == str(
                        int(json_options["max_sentences"])):
                options["max_sentences"] = json_options["max_sentences"]
            if "max_summary_size" in json_options and str(
                    json_options["max_summary_size"]) == str(
                        int(json_options["max_summary_size"])):
                options["max_summary_size"] = json_options["max_summary_size"]
            if "max_summary_percent" in json_options and str(
                    json_options["max_summary_percent"]) == int(
                        json_options["max_summary_percent"]):
                options["max_summary_percent"] = json_options[
                    "max_summary_percent"]
            if "max_keywords" in json_options and str(
                    json_options["max_keywords"]) == int(
                        json_options["max_keywords"]):
                options["max_keywords"] = json_options["max_keywords"]
            if "fields_to_summarize" in json_options:
                options["fields_to_summarize"] = json_options[
                    "fields_to_summarize"]

        engine = sqlpie.Summarizer(bucket, document_ids, documents)
        results = engine.summarize(options)
        return {'success': True, 'results': results}
Example #12
0
    def __init__(self, document={}, parsers=[]):
        """
        Args:
            document (object)    :
            bucket (str)        : Name of the bucket
        """
        if sqlpie.Document.ID_FIELD in document.keys():
            self.document_id = sqlpie.Util.to_sha1(
                unicode(document[sqlpie.Document.ID_FIELD]))
        else:
            unique_identifier = sqlpie.Util.get_unique_identifier()
            self.document_id = sqlpie.Util.to_sha1(unique_identifier)
            document[sqlpie.Document.ID_FIELD] = unique_identifier

        if sqlpie.Document.BUCKET_FIELD in document.keys():
            self.bucket = document[sqlpie.Document.BUCKET_FIELD]
        else:
            self.bucket = sqlpie.Bucket.DEFAULT

        self.document = document
        self.id = None
        self.bucket_id = None
        self.is_compressed = None
        self.state = Document.IS_NOT_INDEXED
        self.created_at = None
        self.data = None

        self.parsers = {}
        for p in parsers:
            filename = os.path.dirname(
                os.path.realpath(__file__)) + "/../parsers/" + p + ".py"
            if os.path.isfile(filename):
                try:
                    source = open(filename, "r").read()
                    code = compile(source, '<string>', 'exec')
                    self.parsers[p] = code
                except:
                    raise sqlpie.CustomException(
                        sqlpie.CustomException.INVALID_PARSER)
Example #13
0
    def _conditions_to_sql(conditions):
        sql_statements = []
        sql_replacement = []
        tokens_requiring_encoding = [
            "subject_bucket", "subject_id", "object_bucket", "object_id",
            "predicate"
        ]
        bucket_tokens = ["subject_bucket", "object_bucket"]
        predicate_token = ["predicate"]
        timestamp_token = ["created_at"]
        field_replacements = {
            "subject_bucket": "subject_bucket_id",
            "object_bucket": "object_bucket_id",
            "subject_id": "subject_id",
            "object_id": "object_id",
            "predicate": "predicate_id",
            "value": "predicate_value",
            "timestamp": "created_at",
            "options": "options"
        }
        valid_tokens = field_replacements.keys()

        options = {"limit": 10, "offset": 0}
        if "options" in conditions.keys():
            if "limit" in conditions["options"]:
                options["limit"] = conditions["options"]["limit"]
            if "offset" in conditions["options"]:
                options["offset"] = conditions["options"]["offset"]

        for k in conditions.keys():
            k = k.lower()
            if k in valid_tokens:
                v = conditions[k]
                v_type = type(v).__name__
                if v_type == "list":
                    if len(v) > 0:
                        if k in tokens_requiring_encoding:
                            sql_string_list = []
                            for item in v:
                                if k in bucket_tokens:
                                    b = sqlpie.Bucket(item)
                                    lv = b.bucket_id
                                elif k in predicate_token:
                                    p = sqlpie.Predicate(item)
                                    lv = p.predicate_id
                                else:
                                    lv = sqlpie.Term.get_key(item)
                                sql_string_list.append("UNHEX(%s)")
                                sql_replacement.append(lv)
                        else:
                            sql_string_list = []
                            for item in v:
                                sql_string_list.append("%s")
                                sql_replacement.append(item)
                        k = field_replacements[k]
                        sql_statements.append(k + " in (" +
                                              ",".join(sql_string_list) + ")")
                else:
                    if k in tokens_requiring_encoding:
                        if k in bucket_tokens:
                            b = sqlpie.Bucket(v)
                            v = b.bucket_id
                        elif k in predicate_token:
                            p = sqlpie.Predicate(v)
                            v = p.predicate_id
                        else:
                            v = sqlpie.Term.get_key(v)
                        k = field_replacements[k]
                        sql_statements.append(k + " = UNHEX(%s)")
                        sql_replacement.append(v)
                    else:
                        k = field_replacements[k]
                        if type(v).__name__ == "dict":
                            if v.has_key("start") or v.has_key("end"):
                                if v.has_key("start"):
                                    if k in timestamp_token:
                                        condition = k + " >= FROM_UNIXTIME(%s)"
                                    else:
                                        condition = k + " >= %s"
                                    sql_statements.append(condition)
                                    sql_replacement.append(v["start"])

                                if v.has_key("end"):
                                    if k in timestamp_token:
                                        condition = k + " <= FROM_UNIXTIME(%s)"
                                    else:
                                        condition = k + " <= %s"
                                    sql_statements.append(condition)
                                    sql_replacement.append(v["end"])
                        else:
                            sql_statements.append(k + " = %s")
                            sql_replacement.append(v)
            else:
                raise sqlpie.CustomException(
                    sqlpie.CustomException.INVALID_ARGUMENTS)
        return (sql_statements, sql_replacement, options)
Example #14
0
    def matching(request=None):
        json_data = request.get_json()
        results = []
        if "num_results" in json_data:
            num_results = int(json_data["num_results"])
        else:
            num_results = 1

        if "filter_query" in json_data:
            filter_query = json_data["filter_query"]
        else:
            filter_query = ""

        if "output_predicate" in json_data:
            output_predicate = json_data["output_predicate"]
        else:
            output_predicate = None

        valid_keys = [
            "num_results", "filter_query", "output_predicate", "bucket",
            "document_id", "search_bucket", "document"
        ]
        if not all(k in valid_keys for k in json_data.keys()):
            raise sqlpie.CustomException(
                sqlpie.CustomException.INVALID_ARGUMENTS)
        elif ("bucket" in json_data and "document_id" in json_data and "search_bucket" in json_data) and not \
            (len(json_data["bucket"].strip()) == 0 or len(json_data["document_id"].strip()) == 0 or
                len(json_data["search_bucket"].strip()) == 0):
            bucket = json_data["bucket"]
            document_id = json_data["document_id"]
            search_bucket = json_data["search_bucket"]
            matcher = sqlpie.Matcher()
            results = matcher.match_single(bucket, document_id, search_bucket,
                                           num_results, filter_query)
            resp = {'success': True, 'results': results}
        elif ("bucket" in json_data and (not "document_id" in json_data) and "search_bucket" in json_data) and not \
            (len(json_data["bucket"].strip()) == 0 or len(json_data["search_bucket"].strip()) == 0):
            bucket = json_data["bucket"]
            search_bucket = json_data["search_bucket"]
            matcher = sqlpie.Matcher()
            total_matches, output_predicate = matcher.match_all(
                bucket, search_bucket, num_results, filter_query,
                output_predicate)
            resp = {
                'success': True,
                'total_matches': total_matches,
                'output_predicate': output_predicate
            }
        elif ("document"
              in json_data) and ("search_bucket" in json_data) and len(
                  unicode(json_data["document"]).strip()) > 0 and len(
                      json_data["search_bucket"].strip()) > 0:
            document = json_data["document"]
            search_bucket = json_data["search_bucket"]
            matcher = sqlpie.Matcher()
            results = matcher.match_document(document, search_bucket,
                                             num_results, filter_query)
            resp = {'success': True, 'results': results}
        else:
            raise sqlpie.CustomException(
                sqlpie.CustomException.INVALID_ARGUMENTS)

        return resp