Python hasWebsite Examples

Programming Language: Python

Namespace/Package Name: queryLog

Method/Function: hasWebsite

Examples at hotexamples.com: 2

Python hasWebsite - 2 examples found. These are the top rated real world Python examples of queryLog.hasWebsite extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: qeMarkov.py Project: vmanisha/QueryExpansion

def updateNetwork(query, network, qp, searcher, tlc, field, ntype):
  #find the top 50 documents
  q = qp.parse(unicode(query))
  totalText = ''
  total = 0.0
  tmin = -1000
  tmax = 1000
  terms = set()
  try:
    searcher.search_with_collector(q, tlc)
  except TimeLimit:
    print '--LONG-- ', query

  results = tlc.results()
  for entry in results:
    totalText += entry[field] + ' '

  finder = BigramCollocationFinder.from_words(word_tokenize(totalText))
  #update the network

  rList = finder.score_ngrams(biMeas.pmi)

  for rTuple in rList:
    total += rTuple[1]
    if tmin > rTuple[1]:
      tmin = rTuple[1]
    if tmax < rTuple[1]:
      tmax = rTuple[1]

  for rTuple in sorted(rList, reverse=True, key=lambda x: x[1]):
    if (len(terms) < 3000 and finder.ngram_fd[rTuple[0]] > 2
     ) or (finder.ngram_fd[rTuple[0]] > 1.0 and rTuple[0][0] in query or
           rTuple[0][1] in query and len(terms) < 4000):
      #if (len(terms) < 3000  and finder.ngram_fd[rTuple[0]] > 2) or (rTuple[0][0] in query or rTuple[0][1] in query and len(terms) < 4000):
      a = rTuple[0][0]
      if len(a) > 2 and hasAlpha(a) and a not in stopSet and not hasWebsite(a):
        if a not in network:
          network[a] = {}
          terms.add(a)
        b = rTuple[0][1]
        if len(b) > 2 and hasAlpha(b) and b not in stopSet and not hasWebsite(
            b):
          if b not in network[a]:
            network[a][b] = {}
            terms.add(b)
          network[a][b][ntype] = network[a][b].setdefault(ntype, 0.0) + (
              (rTuple[1] - tmin) / (tmax - tmin))

  print query, ntype, len(terms)

  return terms

Example #2

Show file

File: randomWalk.py Project: vmanisha/QueryExpansion

  def updateNetworkFromText(self, query, text, ntype):

    total = 0.0
    tmin = -1000
    tmax = 1000

    qsplit = query.split()
    for entry in qsplit:
      term = self.porter.stem(entry)
      self.network[term] = {}
      self.terms.add(term)

    finder = BigramCollocationFinder.from_words(word_tokenize(text))
    #update the network

    rList = finder.score_ngrams(self.biMeas.pmi)
    for rTuple in rList:
      total += rTuple[1]
      if tmin > rTuple[1]:
        tmin = rTuple[1]
      if tmax < rTuple[1]:
        tmax = rTuple[1]

    for rTuple in sorted(rList, reverse=True, key=lambda x: x[1]):
      if (len(self.terms) < 1000  and finder.ngram_fd[rTuple[0]] > 2) or \
			((finder.ngram_fd[rTuple[0]] > 1.0 and rTuple[0][0] in query) or \
			 (rTuple[0][1] in query and len(self.terms) < 1500)):
        noSymbA = SYMBreg.sub('', rTuple[0][0])
        noSymbB = SYMBreg.sub('', rTuple[0][1])

        if noSymbA not in stopSet and noSymbB not in stopSet:
          a = self.porter.stem(noSymbA)
          b = self.porter.stem(noSymbB)
          if len(a) > 2 and hasAlpha(a) and a not in stopSet and not hasWebsite(a) \
					and len(b) > 2 and hasAlpha(b) and b not in stopSet and not hasWebsite(b):
            if a not in self.network:
              self.network[a] = {}
              self.terms.add(a)
            if b not in self.network[a]:
              self.network[a][b] = {}
              self.terms.add(b)
            self.network[a][b][ntype] = self.network[a][b].setdefault(
                ntype, 0.0) + ((rTuple[1] - tmin) / (tmax - tmin))

    print query, ntype, len(self.terms)