Esempio n. 1
0
    def search(self, query_list, fields=None):

        with self.ix.searcher() as searcher:

            query_list2 = []
            for qq in query_list:
                if qq=='AND' or qq=='OR':
                    query_list2.append(qq)
                else:
                    query_list2.append(qq.lower())
            query_string = " ".join(query_list2)

            query = None
            if ":" in query_string:
                # If the user DOES specify a field,
                # setting the fields determines what fields
                # are searched with the free terms (no field)
                fields = ['title', 'content','owner_name','owner_email','github_user']
                query = MultifieldParser(fields, schema=self.ix.schema)
                est = pytz.timezone('America/New_York')
                query.add_plugin(DateParserPlugin(free=True, basedate=est.localize(datetime.utcnow())))
                query.add_plugin(GtLtPlugin())
                try:
                    query = query.parse(query_string)
                except:
                    # Because the DateParser plugin is an idiot
                    query_string2 = re.sub(r':(\w+)',':\'\g<1>\'',query_string)
                    try:
                        query = query.parse(query_string2)
                    except:
                        print("parsing query %s failed"%(query_string))
                        print("parsing query %s also failed"%(query_string2))
                        query = query.parse('')

            else:
                # If the user does not specify a field,
                # these are the fields that are actually searched
                fields = ['url','title', 'content','owner_name','owner_email','github_user']
                query = MultifieldParser(fields, schema=self.ix.schema)
                est = pytz.timezone('America/New_York')
                query.add_plugin(DateParserPlugin(free=True, basedate=est.localize(datetime.utcnow())))
                query.add_plugin(GtLtPlugin())
                try:
                    query = query.parse(query_string)
                except:
                    print("parsing query %s failed"%(query_string))
                    query = query.parse('')
            parsed_query = "%s" % query
            print("query: %s" % parsed_query)
            results = searcher.search(query, terms=False, scored=True, groupedby="kind")
            search_result = self.create_search_result(results)

        return parsed_query, search_result
Esempio n. 2
0
async def search(query_str, ctx):
    ix = open_dir("indexdir")
    parser = QueryParser("content", ix.schema)
    parser.add_plugin(qparser.FuzzyTermPlugin())
    parser.add_plugin(GtLtPlugin())
    parser.add_plugin(DateParserPlugin())
    query = parser.parse(query_str)
    print(query)
    with ix.searcher(weighting=scoring.PL2) as searcher:
        results = searcher.search(query, limit=5)
        results.fragmenter = highlight.SentenceFragmenter()
        results.fragmenter.surround = 50
        results.fragmenter.maxchars = 10000
        results.formatter = DiscordBoldFormatter()
        embed = discord.Embed(
            title="Results",
            color=discord.Color(0x3cd63d),
            description="From search: **{}**".format(query_str))
        for hit in results:
            # embed.add_field(name="[{}]({})".format(hit["title"], hit["url"]), value="{}".format(hit.highlights("content")))
            embed.add_field(name="\u200b",
                            value=f"[{hit['title']}]({hit['url']})\n"
                            f"{hit.highlights('content', minscore=0)}",
                            inline=False)
    await ctx.send(embed=embed)
def searchFe(busqueda):
    ix = open_dir("index")
    searcher = ix.searcher()
    date = "{" + busqueda + " to]"
    parser = QueryParser("fecha", ix.schema)

    parser.add_plugin(DateParserPlugin(free=True))
    parser.add_plugin(GtLtPlugin())
    myquery = parser.parse(date)

    results = searcher.search(myquery)

    return results
Esempio n. 4
0
 def __init__(self, fieldname):
     '''
     Constructor
     '''
     self.w_parser = SimpleParser(fieldname, None)
     self.w_parser.add_plugin(FieldsPlugin())
     self.w_parser.add_plugin(OperatorsPlugin())
     self.w_parser.add_plugin(PhrasePlugin())
     self.w_parser.add_plugin(SingleQuotePlugin())
     self.w_parser.add_plugin(GroupPlugin())
     self.w_parser.add_plugin(PrefixPlugin())
     self.w_parser.add_plugin(GtLtPlugin())
     self.w_parser.add_plugin(RangePlugin())
     self.query = None
     self.current_node_stack = []
Esempio n. 5
0
def search_langs(repos, q, limit=1000, **kw):
    index_ = get_langs_index(repos)
    qp = QueryParser("ini", schema=index_.schema)
    qp.add_plugin(GtLtPlugin())
    q = '{0} {1}'.format(
        q, ' '.join('{0}:"{1}"'.format(k, v) for k, v in kw.items()))

    def highlight(res):
        hl = res.highlights('ini', top=1)
        if hl:
            for line in hl.split('\n'):
                if '[[' in line:
                    return line.strip()

    with index_.searcher() as searcher:
        results = searcher.search(qp.parse(q), limit=limit)
        results.formatter = BracketFormatter()
        return (len(results), [
            Languoid(r['id'], r.get('iso'), r['name'], r['level'], r['fname'],
                     highlight(r)) for r in results
        ])
Esempio n. 6
0
    def __init__(self, location):
        """
        create a new redis store, the location given will be used to generate keys

        this keys will be combined to get/set instance config

        Args:
            location (Location)
        """
        super().__init__(location, Serializer())
        config = self.config_env.get_store_config("whoosh")
        self.base_index_path = config["path"]

        self.schema = self.get_schema()
        self.index = self.get_index(self.schema)

        self.default_plugins = [
            FuzzyTermPlugin(), GtLtPlugin(),
            PhrasePlugin()
        ]
        self.default_pagenum = 1
        self.default_pagelen = 20
Esempio n. 7
0
    def search(self,terms, limit=100, time_slice=None, saveAs="search"):

        big_tables = {}
        for i in CATS:
            big_tables[i]=[]

        f = open("./visualization/"+saveAs+"_results.html", "w+")
        master_str = "<!DOCTYPE html><html><style>hr {border: 4;width: 80%;}</style>"+ "<title>Search Results [term(s): "+terms+"]</title><body><br>"

        ix = index.open_dir("twitter_index", indexname="TWTTR")
        w = ix.writer()
        qp = QueryParser("content", schema=w.schema)
        qp.add_plugin(DateParserPlugin())
        qp.add_plugin(GtLtPlugin())
        q = qp.parse(terms)
        print("search terms", q)
        list_IDs =[]
        with w.searcher() as s:
            results = s.search(q, limit=limit)
            if time_slice != None:
                within = []
                start = int("".join(time_slice[0].split(":")))
                end = int("".join(time_slice[1].split(":")))
                if (0<=start<=2400) and (0 <=end<=2400):
                    for res in results:
                        time = res["posted"]
                        if time.minute < 10:
                            t = int(str(time.hour)+"0"+ str(time.minute))
                        else:
                            t = int(str(time.hour)+ str(time.minute))

                        if start < end and start <= t <=end:
                            within.append(res)
                        elif end < start and (start <= t or t <= end):
                            within.append(res)
                        else:
                            pass

                    results = within
                else:
                    print("Invalid time slice, no results returned.")
                    results = []
            print("%d search results" % len(results))
            print("--"*15)
            for res in results:
                list_IDs.append(int(res["tweet_id"]))
                self.to_nums(res["liwc"], big_tables)
                master_str += self.to_html(res, True)

            master_str += "</body></html>"
            try:
                f.write(master_str)
                f.close()
            except:
                f.write("Unicode parsing error")
                f.close()

            res_str = "<!DOCTYPE html><html><title>LIWC statistics for term(s):"+terms+"</title><body><br>"
            res_str += "<table><tr>"+("<th>Category&nbsp;</th><th>Average</th><th>Std Dev</th><th>Max&nbsp</th><th>Min&nbsp</th>"*3)+"</tr>"
            count = 0
            for_later = {}
            for j in list(big_tables.keys()):
                vals = big_tables[j]
                #print j, vals
                outputs = []
                if len(vals) != 0:
                    avg = sum(vals)/float(len(vals))
                    outputs.append(round(avg,4))
                    var = [(i-avg)**2 for i in vals]
                    std = math.sqrt(sum(var)/len(var))
                    outputs.append(round(std,4))
                    outputs.append(round(max(vals),4))
                    outputs.append(round(min(vals),4))
                else:
                    outputs = ["NA","NA","NA","NA"]
                if count%3 == 0:
                    res_str+= "<tr>"


                res_str += "<td>"+str(j)+"</td><td>"+str(outputs[0])+"</td><td>"+str(outputs[1])
                res_str += "</td><td>"+str(outputs[2])+"</td><td>"+str(outputs[3])+"</td>"
                count +=1
                if count%3 == 0:
                    res_str+= "</tr>"
                for_later[j] = outputs

            res_str+="</table>"

            if big_tables["WC"] == []:
                big_tables = ""
                res_str = "<!DOCTYPE html><html><title>LIWC statistics for term(s): "
                res_str += terms+"</title><body><br>"
                res_str += "<p>No matches found </p></body></html>"

            t = open("./visualization/"+ saveAs +"_averages.html", "w+")
            try:
                t.write(res_str)
                t.close()
            except:
                t.write("Unicode Error")
                t.close()

            self.graph_Tweets(results,saveAs)

        return res_str, for_later, master_str, list_IDs