コード例 #1
0
ファイル: DB.py プロジェクト: katelaurel/PhiloLogic4
 def get_all(self, philo_type="doc", sort_order=["rowid"]):
     """ get all objects of type philo_type """
     hash = hashlib.sha1()
     hash.update(self.path)
     hash.update(philo_type)
     all_hash = hash.hexdigest()
     all_file = self.path + "/hitlists/" + all_hash + ".hitlist"
     if not os.path.isfile(all_file):
         #write out the corpus file
         return MetadataQuery.metadata_query(self, all_file, [{"philo_type": ['"%s"' % philo_type]}], sort_order)
     else:
         return HitList.HitList(all_file, 0, self)
コード例 #2
0
ファイル: DB.py プロジェクト: waltms/libphilo
 def get_all(self, philo_type="doc"):
     """ get all objects of type philo_type """
     hash = hashlib.sha1()
     hash.update(self.path)
     hash.update(philo_type)
     all_hash = hash.hexdigest()
     all_file = "/var/lib/philologic/hitlists/" + all_hash + ".hitlist"
     if not os.path.isfile(all_file):
         #write out the corpus file
         return MetadataQuery.metadata_query(self, all_file, [{
             "philo_type": ['"%s"' % philo_type]
         }])
     else:
         return HitList.HitList(all_file, 0, self)
コード例 #3
0
ファイル: DB.py プロジェクト: waltms/libphilo
    def query(self,
              qs="",
              method="",
              method_arg=0,
              limit=10000000,
              **metadata):
        """query the PhiloLogic database"""
        hash = hashlib.sha1()
        hash.update(self.path)
        has_metadata = False
        corpus_file = None

        for key, value in metadata.items():
            if isinstance(value, str):
                if value == "":
                    pass
                else:
                    value = [value]
                    metadata[key] = value
            value = [v for v in value if v]
            if value:
                has_metadata = True
                hash.update("%s=%s" % (key, "|".join(value)))

        if has_metadata:
            corpus_hash = hash.hexdigest()
            corpus_file = "/var/lib/philologic/hitlists/" + corpus_hash + ".hitlist"
            corpus_width = 7

            if not os.path.isfile(corpus_file):
                # before we query, we need to figure out what type each parameter belongs to,
                # and sort them into a list of dictionaries, one for each type.
                metadata_dicts = [
                    {} for level in self.locals["metadata_hierarchy"]
                ]
                #                print >> sys.stderr, "querying %s" % repr(metadata.items())
                for k, v in metadata.items():
                    for i, params in enumerate(
                            self.locals["metadata_hierarchy"]):
                        if v and (k in params):
                            metadata_dicts[i][k] = v
                            if k in self.locals["metadata_types"]:
                                this_type = self.locals["metadata_types"][k]
                                if this_type == "div":
                                    metadata_dicts[i]["philo_type"] = [
                                        '"div"|"div1"|"div2"|"div3"'
                                    ]
                                else:
                                    metadata_dicts[i]["philo_type"] = [
                                        '"%s"' %
                                        self.locals["metadata_types"][k]
                                    ]
                metadata_dicts = [d for d in metadata_dicts if d]
                corpus = MetadataQuery.metadata_query(self, corpus_file,
                                                      metadata_dicts)
            else:
                #                print >> sys.stderr, "cached @ %s" % corpus_file
                corpus = HitList.HitList(corpus_file, 0, self)
                corpus.finish()
            print >> sys.stderr, "corpus file of length %d" % len(corpus)
            if len(corpus) == 0:
                return corpus
        else:
            corpus = None
        if qs:
            words_per_hit = len(qs.split(" "))
            hash.update(qs)
            hash.update(method)
            hash.update(str(method_arg))
            hash.update(str(limit))
            search_hash = hash.hexdigest()
            search_file = "/var/lib/philologic/hitlists/" + search_hash + ".hitlist"
            if not os.path.isfile(search_file):
                return Query.query(self,
                                   qs,
                                   corpus_file,
                                   self.width,
                                   method,
                                   method_arg,
                                   limit,
                                   filename=search_file)
            else:
                return HitList.HitList(search_file, words_per_hit, self)
        else:
            if corpus:
                return corpus
            else:
                return self.get_all("doc")
コード例 #4
0
ファイル: DB.py プロジェクト: katelaurel/PhiloLogic4
    def query(self, qs="", method="", method_arg="", limit="", sort_order=["rowid"], **metadata):
        """query the PhiloLogic database"""
        method = method or "proxy"
        if isinstance(method_arg, str):
            try:
                method_arg = int(method_arg)
            except:
                if method == "cooc" or method == "sentence":
                    method_arg = 6
                else:
                    method_arg = 0

        if isinstance(limit, str):
            try:
                limit = int(limit)
            except:
                limit = 10000000

        hash = hashlib.sha1()
        hash.update(self.path)
        has_metadata = False
        corpus_file = None

        for key, value in metadata.items():
            if isinstance(value, str):
                if value == "":
                    pass
                else:
                    value = [value]
                    metadata[key] = value
            value = [v for v in value if v]
            if value:
                has_metadata = True
                hash.update("%s=%s" % (key, "|".join(value)))

        if has_metadata:
            corpus_hash = hash.hexdigest()
            corpus_file = self.path + "/hitlists/" + corpus_hash + ".hitlist"
            corpus_width = 7

            if not os.path.isfile(corpus_file):
                # before we query, we need to figure out what type each parameter belongs to,
                # and sort them into a list of dictionaries, one for each type.
                metadata_dicts = [{} for level in self.locals["metadata_hierarchy"]]
                #                print >> sys.stderr, "querying %s" % repr(metadata.items())
                for k, v in metadata.items():
                    for i, params in enumerate(self.locals["metadata_hierarchy"]):
                        if v and (k in params):
                            metadata_dicts[i][k] = v
                            if k in self.locals["metadata_types"]:
                                this_type = self.locals["metadata_types"][k]
                                if this_type == "div":
                                    metadata_dicts[i]["philo_type"] = ['"div"|"div1"|"div2"|"div3"']
                                else:
                                    metadata_dicts[i]["philo_type"] = ['"%s"' % self.locals["metadata_types"][k]]
                metadata_dicts = [d for d in metadata_dicts if d]
                if "philo_id" in metadata:
                    if metadata_dicts:
                        metadata_dicts[-1]["philo_id"] = metadata["philo_id"]
                    else:
                        metadata_dicts.append({"philo_id": metadata["philo_id"]})
                corpus = MetadataQuery.metadata_query(self, corpus_file, metadata_dicts, sort_order)
            else:
                #                print >> sys.stderr, "cached @ %s" % corpus_file
                if sort_order == ["rowid"]:
                    sort_order = None
                corpus = HitList.HitList(corpus_file, 0, self, sort_order=sort_order)
                corpus.finish()
            #print >> sys.stderr, "corpus file of length %d" % len(corpus)
            if len(corpus) == 0:
                return corpus
        else:
            corpus = None
        if qs:
            #            words_per_hit = len(qs.split(" "))
            #            words_per_hit = len(qs.split("\n\n"))
            hash.update(qs)
            hash.update(method)
            hash.update(str(method_arg))
            hash.update(str(limit))
            search_hash = hash.hexdigest()
            search_file = self.path + "/hitlists/" + search_hash + ".hitlist"
            if sort_order == ["rowid"]:
                sort_order = None
            if not os.path.isfile(search_file):
                return Query.query(self, qs, corpus_file, self.width, method, method_arg, limit, filename=search_file, sort_order=sort_order)
            else:
                parsed = QuerySyntax.parse_query(qs)
                grouped = QuerySyntax.group_terms(parsed)
                split = Query.split_terms(grouped)
                words_per_hit = len(split)
                return HitList.HitList(search_file, words_per_hit, self, sort_order=sort_order)
        else:
            if corpus:
                return corpus
            else:
                return self.get_all("doc", sort_order)
コード例 #5
0
    def query(self,qs="",method="",method_arg=0,limit=10000000,**metadata):
        """query the PhiloLogic database"""
        hash = hashlib.sha1()
        hash.update(self.path)
        has_metadata = False
        corpus_file = None

        for key,value in metadata.items():
            if isinstance(value,str):
                if value == "":
                    pass
                else:
                    value = [value]
                    metadata[key] = value
            value = [v for v in value if v]
            if value:
                has_metadata = True
                hash.update("%s=%s" % (key,"|".join(value)))

        if has_metadata:
            corpus_hash = hash.hexdigest()
            corpus_file = self.path + "/hitlists/" + corpus_hash + ".hitlist"
            corpus_width = 7

            if not os.path.isfile(corpus_file):
                # before we query, we need to figure out what type each parameter belongs to,
                # and sort them into a list of dictionaries, one for each type.
                metadata_dicts = [{} for level in self.locals["metadata_hierarchy"]]
#                print >> sys.stderr, "querying %s" % repr(metadata.items())
                for k,v in metadata.items():
                    for i, params in enumerate(self.locals["metadata_hierarchy"]):
                        if v and (k in params):
                            metadata_dicts[i][k] = v
                            if k in self.locals["metadata_types"]:
                                this_type = self.locals["metadata_types"][k]
                                if this_type == "div":
                                    metadata_dicts[i]["philo_type"] = ['"div"|"div1"|"div2"|"div3"']
                                else:
                                    metadata_dicts[i]["philo_type"] = ['"%s"' % self.locals["metadata_types"][k]]
                metadata_dicts = [d for d in metadata_dicts if d]
                corpus = MetadataQuery.metadata_query(self,corpus_file,metadata_dicts)
            else:
#                print >> sys.stderr, "cached @ %s" % corpus_file
                corpus = HitList.HitList(corpus_file,0,self)
                corpus.finish()             
            #print >> sys.stderr, "corpus file of length %d" % len(corpus)
            if len(corpus) == 0:
                return corpus
        else:
            corpus = None
        if qs:
            words_per_hit = len(qs.split(" "))
            hash.update(qs)
            hash.update(method)
            hash.update(str(method_arg))
            hash.update(str(limit))
            search_hash = hash.hexdigest()
            search_file = self.path + "/hitlists/" + search_hash + ".hitlist"
            if not os.path.isfile(search_file):
                return Query.query(self,qs,corpus_file,self.width,method,method_arg,limit,filename=search_file)
            else:
                return HitList.HitList(search_file,words_per_hit,self)
        else:
            if corpus:
                return corpus
            else:
                return self.get_all("doc")
コード例 #6
0
ファイル: DB.py プロジェクト: pleonard212/PhiloLogic4
    def query(self,
              qs="",
              method="",
              method_arg=0,
              limit=10000000,
              **metadata):
        """query the PhiloLogic database"""
        hash = hashlib.sha1()
        hash.update(self.path)
        has_metadata = False
        corpus_file = None

        for key, value in metadata.items():
            if isinstance(value, str):
                if value == "":
                    pass
                else:
                    value = [value]
                    metadata[key] = value
            value = [v for v in value if v]
            if value:
                has_metadata = True
                hash.update("%s=%s" % (key, "|".join(value)))

        if has_metadata:
            corpus_hash = hash.hexdigest()
            corpus_file = self.path + "/hitlists/" + corpus_hash + ".hitlist"
            corpus_width = 7

            if not os.path.isfile(corpus_file):
                # before we query, we need to figure out what type each parameter belongs to,
                # and sort them into a list of dictionaries, one for each type.
                metadata_dicts = [
                    {} for level in self.locals["metadata_hierarchy"]
                ]
                #                print >> sys.stderr, "querying %s" % repr(metadata.items())
                for k, v in metadata.items():
                    for i, params in enumerate(
                            self.locals["metadata_hierarchy"]):
                        if v and (k in params):
                            metadata_dicts[i][k] = v
                            if k in self.locals["metadata_types"]:
                                this_type = self.locals["metadata_types"][k]
                                if this_type == "div":
                                    metadata_dicts[i]["philo_type"] = [
                                        '"div"|"div1"|"div2"|"div3"'
                                    ]
                                else:
                                    metadata_dicts[i]["philo_type"] = [
                                        '"%s"' %
                                        self.locals["metadata_types"][k]
                                    ]
                metadata_dicts = [d for d in metadata_dicts if d]
                if "philo_id" in metadata:
                    if metadata_dicts:
                        metadata_dicts[-1]["philo_id"] = metadata["philo_id"]
                    else:
                        metadata_dicts.append(
                            {"philo_id": metadata["philo_id"]})
                corpus = MetadataQuery.metadata_query(self, corpus_file,
                                                      metadata_dicts)
            else:
                #                print >> sys.stderr, "cached @ %s" % corpus_file
                corpus = HitList.HitList(corpus_file, 0, self)
                corpus.finish()
            #print >> sys.stderr, "corpus file of length %d" % len(corpus)
            if len(corpus) == 0:
                return corpus
        else:
            corpus = None
        if qs:
            #            words_per_hit = len(qs.split(" "))
            #            words_per_hit = len(qs.split("\n\n"))
            hash.update(qs)
            hash.update(method)
            hash.update(str(method_arg))
            hash.update(str(limit))
            search_hash = hash.hexdigest()
            search_file = self.path + "/hitlists/" + search_hash + ".hitlist"
            if not os.path.isfile(search_file):
                return Query.query(self,
                                   qs,
                                   corpus_file,
                                   self.width,
                                   method,
                                   method_arg,
                                   limit,
                                   filename=search_file)
            else:
                parsed = QuerySyntax.parse_query(qs)
                grouped = QuerySyntax.group_terms(parsed)
                split = Query.split_terms(grouped)
                words_per_hit = len(split)
                #                parsed = QuerySyntax.parse_query(qs)
                #                parsed_split = []
                #                for label,token in parsed:
                #                    l,t = label,token
                #                    if l == "QUOTE":
                #                        subtokens = t[1:-1].split(" ")
                #                        parsed_split += [("QUOTE_S",sub_t) for sub_t in subtokens if sub_t]
                #                    else:
                #                        parsed_split += [(l,t)]
                #                command = Query.format_parsed_query(parsed_split,self)
                #                words_per_hit = len(command.split("\n\n"))

                return HitList.HitList(search_file, words_per_hit, self)
        else:
            if corpus:
                return corpus
            else:
                return self.get_all("doc")