def get_keyword_relevancy_map(self, name_list, n_list, terms, sortcontext, enginetype='BING', license=None): """ http://www.clips.ua.ac.be/pages/pattern-web#sort """ results_list = [] self.name_list = name_list results = sort( terms=[], context=sortcontext, # Term used for sorting. service=enginetype, # GOOGLE, YAHOO, BING, ... license=None, # You should supply your own API license key # for the given service. strict=True, # Wraps the query in quotes, i.e.'mac sweet'. reverse=True, # Reverses term and context: 'sweet mac' <=> # 'mac sweet'. cached=True) for weight, term in results: results.append("%5.2f" % (weight * 100) + "%", term) return { 'data': results_list, 'summary': None }
import os, sys; sys.path.insert(0, os.path.join("..", "..")) from pattern.web import GOOGLE, YAHOO, BING, sort # The pattern.web module includes an interesting sort() algorithm. # Ir classifies search terms according to a search engine's total results count. # When a context is defined, it sorts according to relevancy to the context: # sort(terms=["black", "green", "red"], context="Darth Vader") => # yields "black" as the best candidate, # because "black Darth Vader" yields more search results. results = sort( terms = [ "arnold schwarzenegger", "chuck norris", "dolph lundgren", "steven seagal", "sylvester stallone", "mickey mouse", ], context = "dangerous", # Term used for sorting. service = BING, # GOOGLE, YAHOO, BING, ... license = None, # You should supply your own API license key for the given service. strict = True, # Wraps the query in quotes, i.e. 'mac sweet'. reverse = True, # Reverses term and context: 'sweet mac' instead of 'mac sweet'. cached = True) for weight, term in results: print "%5.2f" % (weight * 100) + "%", term
with open(fname) as f: diction = f.readlines() for term in diction: if len(term) > lengthmin: subset.append(term.strip('\n')) # function to get a random term from the minlength dictionary in subset list def rando(listofterms, num): i = 0 while i < num: randomed = random.choice(listofterms) #print randomed searchlist.append(randomed) i = i + 1 return searchlist = [ ] # the list of terms that will be generated in the rando function # setup the default search terms rando( subset, numterms ) # get total list of terms based on numterms set in the globals section above from pattern.web import sort results = sort(terms=searchlist, context=contexter, prefix=True) for weight, term in results: print "%.2f" % (weight * 100) + '%', term exit()
sys.path.insert(0, os.path.join("..", "..")) from pattern.web import GOOGLE, YAHOO, BING, sort # The pattern.web module includes an interesting sort() algorithm. # Ir classifies search terms according to a search engine's total results count. # When a context is defined, it sorts according to relevancy to the context: # sort(terms=["black", "green", "red"], context="Darth Vader") => # yields "black" as the best candidate, # because "black Darth Vader" yields more search results. results = sort( terms=[ "arnold schwarzenegger", "chuck norris", "dolph lundgren", "steven seagal", "sylvester stallone", "mickey mouse", ], context="dangerous", # Term used for sorting. service=BING, # GOOGLE, YAHOO, BING, ... license= None, # You should supply your own API license key for the given service. strict=True, # Wraps the query in quotes, i.e. 'mac sweet'. reverse= True, # Reverses term and context: 'sweet mac' instead of 'mac sweet'. cached=True) for weight, term in results: print "%5.2f" % (weight * 100) + "%", term
from pattern.web import sort results = sort(terms=[ 'sunset', 'puppies', 'kittens', 'babies', 'hedgehogs', 'birthday', 'bunnies' ], context='joy', prefix=True) for weight, term in results: print "%.2f" % (weight * 100) + '%', term
diction = [] subset = [] lengthmin = 6 numterms = 10 fname = 'assets/dictionary-list.html' with open(fname) as f: diction = f.readlines() for term in diction: if len(term) > lengthmin: subset.append(term.strip('\n')) # function to get a random term from the minlength dictionary in subset list def rando(listofterms,num): i = 0 while i < num: randomed = random.choice(listofterms) #print randomed searchlist.append(randomed) i = i + 1 return searchlist = [] # the list of terms that will be generated in the rando function # setup the default search terms rando(subset,numterms) # get total list of terms based on numterms set in the globals section above from pattern.web import sort results = sort(terms=searchlist,context=contexter,prefix=True) for weight, term in results: print "%.2f" % (weight * 100) + '%', term exit()
from pattern.web import Google, plaintext, sort from optparse import OptionParser parser = OptionParser() parser.add_option("-q", "--query", dest="query", help="txt file with searchwords") parser.add_option("-f", "--filename", dest="filename", help="specify filename to save data too") parser.add_option("-c", "--context", dest="context", help="specify context for search") (options, args) = parser.parse_args() # open the list of query words f = codecs.open(options.query, encoding='utf-8') # build results in context of the "Energiewende" results = sort(terms=[x.strip() for x in f], context = options.context) # create weights if results.count() > 0: for weight, term in results: #term = term.encode('utf-8') <-- needed? with codecs.open(options.filename, "a", "utf-8") as f: try: f.write(term.encode("utf-8")) f.write(',') f.write("%5.2f" % (weight * 100)) f.write('\n') except UnicodeDecodeError as e: print e else: print "No results. Sorry."