Example #1
0
	def startThreads(self, url, bundle_url):
		startTime = time.time()		
		ADFBundle.grabBundleKeysByURL(bundle_url)

		print "processing... ", url 
		new_agent = HttpAgent(url)
		response = new_agent.RequestResponse()

		#Read only the first 1MB of data
		snippet = response.read(50000)

		soup = BeautifulSoup(snippet)
		links = soup.findAll('a', {"target" : "_source"})

		#Create an instance for each search results
		parse_url = urlparse(response.geturl())
		hostname = parse_url.scheme + '://' + parse_url.netloc 

		counter = 0
		#populate queue with hosts
		for link in links:			
			counter += 1

			try:
				new_searchResult = SearchResult(hostname, link['href'], None)				
				newTags, newAttrs = new_searchResult.exploreSource() #Get the CSet variables		
				
				Explore.allTags = Explore.allTags.union(newTags)
				Explore.allAttrs = Explore.allAttrs.union(newAttrs)
			except:
				print "link unexplored"
				pass

		elapsedTime = (time.time() - startTime)
		print "Elapsed Time: %s" % elapsedTime
Example #2
0
	def run(self):
		while True:
			#grab url from queue
			link = self.queue.get()
			print "link href:", link['href']

			try:
				new_searchResult = SearchResult(self.hostname, link['href'], None)				
				new_searchResult.searchSource() #Get the CSet variables		
				new_searchResult.grabBundleSource(self.mf) #Grab the Bundle Resources	

				#Container
				if self.param['container'] == 'dialog':
					###Search for Dialogs
					self.runDialogSearch(new_searchResult)						

				elif self.param['container'] == 'page':
					#Page
					tagAfKey = "af:panelgrouplayout"
					self.runPageSearch(new_searchResult, tagAfKey)						

				elif self.param['container'] == 'explore':
					#Generic Search
					tagName = None

					if self.param['tag'] != 'All':
						print "tag:", self.param['tag']
						tagName = self.param['tag']

					attr_dict = dict()
					if self.param['attribute'] != 'All':
						print "attributeA:", self.param['attribute']

						if self.param['attribute_value'] is not None:
							print "value:", self.param['attribute_value']

							attr_dict[(self.param['attribute'])] = self.param['attribute_value']
						else:
							print "value: compile"

							attr_dict[(self.param['attribute'])] = re.compile(".+")

						self.runPageSearch(new_searchResult, tagName, attr_dict)
					else:
						self.runPageSearch(new_searchResult, tagName)
				elif self.param['container'] == 'icon':
					#Icons
					self.runIconSearch(new_searchResult)
				
				print "successful"

				#print Output to Files
			except:
				print "unsuccessful"
				pass

			#signals to queue job is done
			self.queue.task_done()
Example #3
0
    def performSearch(self, criteria):

        #initial configuration
        logger = Logger.getInstance()
        logger.logDebug('performing search with criteria: ' + criteria)
        props = Properties.getInstance()
        index_path = props.get(self.__environment, 'index_path')

        # define schema and get a writer for it
        ix = open_dir(index_path)

        # perform search
        searchResults = []
        i = 0
        with ix.searcher() as searcher:
            query = QueryParser("content", ix.schema).parse(criteria)
            results = searcher.search(query, limit=20)
            for result in results:

                order = result['orderno']
                path = result['path']
                title = result['title']
                searchResult = SearchResult(title, path, order, criteria)
                searchResults.append(searchResult)
                i = i + 1

        logger.logDebug("results found: " + str(i))
        return searchResults
Example #4
0
 def on_searchBtn_clicked(self, dialog):
     #update search cache
     self.update_field_cache()
     # open a search result window
     self.resultWindow = QtWidgets.QDialog()
     Q_IN = (
             'select '
                 'ID, '
                 '"入帳" as type, '
                 'name, '
                 'acquire_date as date, '
                 'keeper, '
                 'remark '
             'from '
                 'hvhnonc_in '
             'where '
                 'name like :q or category like :q '
                 'or subcategory like :q or brand like :q '
                 'or spec like :q or place like :q '
                 'or keep_department like :q or use_department like :q '
                 'or keeper like :q or remark like :q ')
     Q_OUT = (
             'select '
                 'hvhnonc_out.ID as ID, '
                 '"除帳" as type, '
                 'hvhnonc_in.name as name, '
                 'hvhnonc_out.unregister_date as date, '
                 'hvhnonc_in.keeper as keeper, '
                 'hvhnonc_out.unregister_remark as remark '
             'from '
                 'hvhnonc_out '
             'inner join '
                 'hvhnonc_in '
             'on '
                 'hvhnonc_out.in_ID = hvhnonc_in.ID '
                 'and (hvhnonc_in.name like :q '
                 'or hvhnonc_in.category like :q '
                 'or hvhnonc_in.subcategory like :q '
                 'or hvhnonc_in.brand like :q '
                 'or hvhnonc_in.spec like :q '
                 'or hvhnonc_in.place like :q '
                 'or hvhnonc_in.keep_department like :q '
                 'or hvhnonc_in.use_department like :q '
                 'or hvhnonc_in.keeper like :q '
                 'or hvhnonc_in.remark like :q) ')
     Q_BOTH = (Q_IN + 'union all ' + Q_OUT)
     Q_ORDER = 'order by date desc'
     if self.mode == 'in':
         sqlstr = Q_IN + Q_ORDER
     if self.mode == 'out':
         sqlstr = Q_OUT + Q_ORDER
     if self.mode == 'both':
         sqlstr = Q_BOTH + Q_ORDER
     params = ('%{}%'.format(self.query.currentText()),)
     SearchResult(self.resultWindow, sqlstr, params)
     dialog.done(self.resultWindow.exec_())
Example #5
0
 def on_serial_lookup_clicked(self):
     # open a result window
     sqlstr = ("select object_ID, name, count(*) as '數量' "
               "from hvhnonc_in group by name")
     self.resultWindow = QtWidgets.QDialog()
     params = []
     self.sr = SearchResult(self.resultWindow, sqlstr, params)
     self.resultWindow.resize(320,600)
     self.sr.tableWidget.doubleClicked.disconnect()
     self.resultWindow.exec_()
Example #6
0
 def factory(type):
     if type == "SearchNews":
         from SearchNews import SearchNews
         return SearchNews()
     if type == "SearchImage":
         from SearchImage import SearchImage
         return SearchImage()
     if type == "SearchResult":
         from SearchResult import SearchResult
         return SearchResult()
     assert 0, "Bad shape creation: " + type
Example #7
0
    def search(self, query, k):
        results = []
        query_term_freq = collections.defaultdict()
        for token in query:
            if token in query_term_freq:
                query_term_freq[token] = query_term_freq[token] + 1
            else:
                query_term_freq[token] = 1

        query_weight = {}
        for token in query:
            # global term_document_weight
            # global documents
            if token not in self.term_document_weight:
                continue
            else:
                weight = TFIDFSearcher.tf(query_term_freq[token]) \
                         * TFIDFSearcher.idf(len(self.documents), len(self.term_document_weight[token]))
                query_weight[token] = weight

        query_norm = 0.0
        for token in query_weight.keys():
            query_norm += query_weight[token]**2

        query_norm = math.sqrt(query_norm)
        if query_norm == 0.0:
            return self.documents[:k]

        # global docs_norm
        for doc in self.documents:
            working_tokens = set(doc.tokens)
            working_tokens.intersection_update(set(query_weight))
            score = 0.0
            doc_norm = self.docs_norm[doc.id]
            for token in working_tokens:
                score += query_weight[token] * self.term_document_weight[
                    token][doc.id]
            score = score / (query_norm * doc_norm)
            results.append(SearchResult(doc, score))
            print(doc.name + ' ' + str(score))

        results.sort(reverse=True)
        documents_to_return = []
        if k > len(results):
            k = len(results)
        for i in range(k):
            documents_to_return.append(
                results[i]
            )  #.document) # changed to return the SearchResult for further calculation

        return documents_to_return
Example #8
0
    def startThreads(self, url, bundle_url):
        startTime = time.time()
        ADFBundle.grabBundleKeysByURL(bundle_url)

        print "processing... ", url
        new_agent = HttpAgent(url)
        response = new_agent.RequestResponse()

        #Read only the first 1MB of data
        snippet = response.read(50000)

        soup = BeautifulSoup(snippet)
        links = soup.findAll('a', {"target": "_source"})

        #Create an instance for each search results
        parse_url = urlparse(response.geturl())
        hostname = parse_url.scheme + '://' + parse_url.netloc

        counter = 0
        #populate queue with hosts
        for link in links:
            counter += 1

            try:
                new_searchResult = SearchResult(hostname, link['href'], None)
                newTags, newAttrs = new_searchResult.exploreSource(
                )  #Get the CSet variables

                Explore.allTags = Explore.allTags.union(newTags)
                Explore.allAttrs = Explore.allAttrs.union(newAttrs)
            except:
                print "link unexplored"
                pass

        elapsedTime = (time.time() - startTime)
        print "Elapsed Time: %s" % elapsedTime
Example #9
0
    def retrieve_active_listings(self):
        # Make a request to the active listings page
        # TODO: Handle multiple pages (Probably visiting all pages, slow ...)
        url = self.active_listings_url(self._account_name)
        page = urllib.request.urlopen(url)
        soup = BeautifulSoup(page, 'html.parser')

        # Get all the search results listing on the page
        search_results_context = soup.findAll('li', {'class': 'sresult'})
        active_listings = []

        # Pass each li context to construct a search result object representation
        for listing_context in search_results_context:
            search_result = SearchResult(listing_context)
            active_listings.append(search_result)
        return active_listings
Example #10
0
    def search(self, query=None):
        if query is not None:
            self.query = query
        self.query = self.query.replace(' ', '+')

        url = "https://www.googleapis.com/customsearch/v1?key=" + self.api_key + "&cx=" + self.cx_id + "&q=" + self.query + "&alt=json"

        if self.api_key is not "" and self.cx_id is not "" and query is not "":
            try:
                connection = urllib2.urlopen(url)
                res = json.loads(connection.read())
                for r in res["items"]:
                    row = SearchResult(r)
                    self.data.append(row)
                return self.data

            except urllib2.HTTPError, error:
                return None
Example #11
0
def display(soup):
    result_list_row = 2
    display_row = 0
    result_list_column = 0
    result_count = 0
    for result in soup.find_all("article", class_='fixed-recipe-card'):
        with urllib.request.urlopen(result.find("img", class_='fixed-recipe-card__img')
                                            .attrs['data-original-src']) as u:
            raw_data = u.read()
        im = Image.open(BytesIO(raw_data))
        image = ImageTk.PhotoImage(im.resize(image_size))

        results.append(SearchResult(result.find("span", class_='fixed-recipe-card__title-link').string,
                                    result.find("span", class_='stars').attrs['data-ratingstars'],
                                    result.find("span", class_='fixed-recipe-card__reviews').contents[0].attrs[
                                        'number'],
                                    image,
                                    result.find("a", {'data-internal-referrer-link': 'hub recipe'}).attrs['href'],
                       False))

    for result in results:
        if len(result.title) > 21:
            result.title = result.title[:21] + "..."

        container = tk.Frame(root, width=150)

        if result.selected:
            container.background = "green"

        tk.Message(master=container, text=result.title, bg="blue", fg="white", width=150).pack(fill=tk.BOTH)
        tk.Label(master=container, image=result.image, bg="orange").pack()
        tk.Label(master=container, text=result.stars).pack()
        tk.Label(master=container, text=result.reviews).pack()
        result_containers[display_row].append(
            container.grid(row=result_list_row, column=result_list_column, padx=3, pady=3))

        result_list_column = result_list_column + 1
        result_count = result_count + 1
        if result_count % 5 == 0:
            result_list_row = result_list_row + 1
            display_row = display_row + 1
            result_list_column = 0
Example #12
0
def search(params):
    """Performs a search query on AZLyrics and scrapes the search results.

    Parameters
    ----------
    params : str
        Parameters for the search query
    
    Returns
    -------
    list(SearchResult,)
        A list of search results returned from AZLyrics
    
    Raises
    ------
    NoResultsError
        If there were no search results returned
    """

    # Get the search result's BeautifulSoup and see if it has any search results
    doc = get_url_soup(AZLYRICS_SEARCH_URL, params)
    if no_results(doc):
        raise NoResultsError(
            'No search results were found for the query: `{}`'.format(
                params['q']))

    # Compile all of the search results into a list of SearchResult instances
    search_results = []
    result_list = doc.findAll('td', {'class': 'text-left'})
    for result in result_list:
        # Remove digit
        text = re.sub(r'^\d+.\s', '', result.text.strip())
        # Isolate first line
        text = re.sub(r' +', ' ', text).split('\n')[0]
        # Grab first link
        link = result.findAll('a')[0]['href']
        search_results.append(SearchResult(text, link))

    return search_results
Example #13
0
    def run(self):
        while True:
            #grab url from queue
            link = self.queue.get()
            print "link href:", link['href']

            try:
                new_searchResult = SearchResult(self.hostname, link['href'],
                                                None)
                new_searchResult.searchSource()  #Get the CSet variables
                new_searchResult.grabBundleSource(
                    self.mf)  #Grab the Bundle Resources

                #Container
                if self.param['container'] == 'dialog':
                    ###Search for Dialogs
                    self.runDialogSearch(new_searchResult)

                elif self.param['container'] == 'page':
                    #Page
                    tagAfKey = "af:panelgrouplayout"
                    self.runPageSearch(new_searchResult, tagAfKey)

                elif self.param['container'] == 'explore':
                    #Generic Search
                    tagName = None

                    if self.param['tag'] != 'All':
                        print "tag:", self.param['tag']
                        tagName = self.param['tag']

                    attr_dict = dict()
                    if self.param['attribute'] != 'All':
                        print "attributeA:", self.param['attribute']

                        if self.param['attribute_value'] is not None:
                            print "value:", self.param['attribute_value']

                            attr_dict[(self.param['attribute']
                                       )] = self.param['attribute_value']
                        else:
                            print "value: compile"

                            attr_dict[(
                                self.param['attribute'])] = re.compile(".+")

                        self.runPageSearch(new_searchResult, tagName,
                                           attr_dict)
                    else:
                        self.runPageSearch(new_searchResult, tagName)
                elif self.param['container'] == 'icon':
                    #Icons
                    self.runIconSearch(new_searchResult)

                print "successful"

                #print Output to Files
            except:
                print "unsuccessful"
                pass

            #signals to queue job is done
            self.queue.task_done()
Example #14
0
    # allow_q = Term("url", "https://smittenkitchen.com/2021/12/short-rib-onion-soup/")
    # # Don't show any documents where the "tag" field contains "todo"
    # restrict_q = query.Term("tag", "todo")

    with ix.searcher() as s:

        final_results = set()
        if len(ing_list) > 0:
            results = []
            for ingred in ing_list:
                print(" ".join(ingred))
                # allow_q = Term("ingredients", " ".join(ingred))
                # res = s.search(myquery, filter=allow_q, terms = True, limit = 20) 
                res = s.search(parser.parse(" ".join(ingred)),  terms = True, limit = 20) 
                for rr in res:
                    r = SearchResult(rr)
                    final_results.add(r)
            print("Found " + str(len(results)) + "results")
        
        results = s.search(myquery, terms = True, limit = 20)  #filter=allow_q, mask=restrict_q
        print("Found " + str(len(results)) + "results")
        print(results.scored_length())
        for rr in results:
            r = SearchResult(rr)
            final_results.add(r)
        for i, r in enumerate(final_results):
            print("result found: " + str(i))
            print(r.title)
            print(r.url)
            print("\n")
Example #15
0
 def on_submitBtn_clicked(self, dialog):
     # open a search result window
     self.resultWindow = QtWidgets.QDialog()
     sqlstr, params = self.load_form_query()
     SearchResult(self.resultWindow, sqlstr, params)
     dialog.done(self.resultWindow.exec_())
Example #16
0
def convert_results(articles, stemmed):
    return [SearchResult(article, stemmed) for article in articles]
#!/usr/bin/env python
from SearchResult import SearchResult
from PyDictionary import PyDictionary
thesaurus = PyDictionary()

terms = [
    'red',
    'blue',
    'yellow'
]
searchResults = []
for term in terms:
    searchResults.append(SearchResult(term))

book = open('sense-and-sensibility.txt')
for line in book:
    for term in searchResults:
        if term.term in line:
            term.increment()

        for synonym in term.synonyms:
            if synonym in line:
                term.increment(synonym)

for term in searchResults:
    print('There are {} occurrences of synonyms of {} (using {} as synonyms)'.format(term.count, term.term, term.synonyms))