Beispiel #1
0
 def _select(self, params):
     # encode the query as utf-8 so urlencode can handle it
     params['q'] = unicode_safe(params['q'])
     path = '%s/select/?%s' % (self.path, urlencode(params))
     conn = HTTPConnection(self.host, self.port)
     conn.request('GET', path)
     return conn.getresponse()
Beispiel #2
0
 def _select(self, params):
     # encode the query as utf-8 so urlencode can handle it
     params['q'] = unicode_safe(params['q'])
     path = '%s/select/?%s' % (self.path, urlencode(params))
     conn = HTTPConnection(self.host, self.port)
     conn.request('GET', path)
     return conn.getresponse()
Beispiel #3
0
def fetch_article(article,titlefield,linkfield,niceify=False):
    if isinstance(article,dict):
        keys=article.keys()
	if titlefield in keys and linkfield in keys:
	    title=unicode_safe(validate_title(article[titlefield]))
	    link=validate_link(article[linkfield])
	    if niceify:
	        title=niceify_title(title,niceify)
		print "Niced title: %s" % title
	    if title and link:
	        return dict(title=title,link=link)
    return None
Beispiel #4
0
 def _from_python(value):
     """
     Converts python values to a form suitable for insertion into the xml
     we send to solr.
     """
     if isinstance(value, datetime):
         value = value.strftime('%Y-%m-%dT%H:%M:%S.000Z')
     elif isinstance(value, date):
         value = value.strftime('%Y-%m-%dT00:00:00.000Z')
     elif isinstance(value, bool):
         if value:
             value = 'true'
         else:
             value = 'false'
     else:
         value = unicode_safe(value)
     return value
Beispiel #5
0
 def _from_python(value):
     """
     Converts python values to a form suitable for insertion into the xml
     we send to solr.
     """
     if isinstance(value, datetime):
         value = value.strftime('%Y-%m-%dT%H:%M:%S.000Z')
     elif isinstance(value, date):
         value = value.strftime('%Y-%m-%dT00:00:00.000Z')
     elif isinstance(value, bool):
         if value:
             value = 'true'
         else:
             value = 'false'
     else:
         value = unicode_safe(value)
     return value
Beispiel #6
0
def changed(types=None,since=None,commit=True,optimize=False):
    """
        Run by `cron` (through `paster run`) on a schedule to update
        all Things that have been created or have changed since the
        last run. Things add themselves to a `thing_changes` table,
        which we read, find the Things, tokenise, and re-submit them
        to Solr
    """
    global indexed_types

    set_emptying_cache()

    start_t = datetime.now()

    if not types:
        types = indexed_types
    if not since:
        since = get_last_run()

    all_changed = []

    for cls in types:
        changed = set(x[0]
                      for x in thing_changes.get_changed(cls,min_date = since))
        # changed =:= [(Fullname,Date) | ...]
        changed = cls._by_fullname(changed,
                                   data=True, return_dict=False)
        changed = [x for x in changed if not x._spam and not x._deleted]

        # note: anything marked as spam or deleted is not updated in
        # the search database. Since these are filtered out in the UI,
        # that's probably fine.
        if len(changed) > 0:
            changed  = tokenize_things(changed)
            print "Found %d %ss starting with %s" % (len(changed),cls.__name__,unicode_safe(changed[0]['contents']))
            all_changed += changed
        else:
            print "No changed %ss detected" % (cls.__name__,)

    with SolrConnection(commit=commit,optimize=optimize) as s:
        s.add(all_changed)

    save_last_run(start_t)
Beispiel #7
0
 Thing:
 (Field('fullname',
        '_fullname'), Field('date', '_date', is_date=True, reverse=True),
  Field('lang'), Field('ups', '_ups', is_number=True, reverse=True),
  Field('downs', '_downs', is_number=True,
        reverse=True), Field('spam', '_spam'), Field('deleted', '_deleted'),
  Field('hot', lambda t: t._hot * 1000, is_number=True, reverse=True),
  Field('controversy', '_controversy', is_number=True, reverse=True),
  Field('points',
        lambda t: (t._ups - t._downs),
        is_number=True,
        reverse=True)),
 Subreddit: (
     Field('contents',
           lambda s: ' '.join([
               unicode_safe(s.name),
               unicode_safe(s.title),
               unicode_safe(s.description),
               unicode_safe(s.firsttext)
           ]),
           tokenize=True),
     Field('boost', '_downs'),
     #Field('title'),
     #Field('firsttext'),
     #Field('description'),
     #Field('over_18'),
     #Field('sr_type','type'),
 ),
 Link: (
     Field('contents', 'title', tokenize=True),
     Field(
Beispiel #8
0
 def str_to_python(self, value):
     """
     Convert an 'str' field from solr's xml format to python and return it.
     """
     return unicode_safe(value)
Beispiel #9
0
# discussion of multi-language search. The 'boost' field is a
# solr-magic field that ends up being an attribute on the <doc>
# message (rather than a field), and is used to do an index-time boost
# (this magic is done in pysolr.dor_to_elemtree)
search_fields={Thing:     (Field('fullname', '_fullname'),
                           Field('date', '_date',   is_date = True, reverse=True),
                           Field('lang'),
                           Field('ups',   '_ups',   is_number=True, reverse=True),
                           Field('downs', '_downs', is_number=True, reverse=True),
                           Field('spam','_spam'),
                           Field('deleted','_deleted'),
                           Field('hot', lambda t: t._hot*1000, is_number=True, reverse=True),
                           Field('controversy', '_controversy', is_number=True, reverse=True),
                           Field('points', lambda t: (t._ups - t._downs), is_number=True, reverse=True)),
               Subreddit: (Field('contents',
                                 lambda s: ' '.join([unicode_safe(s.name),
                                                     unicode_safe(s.title),
                                                     unicode_safe(s.description),
                                                     unicode_safe(s.firsttext)]),
                                 tokenize = True),
                           Field('boost', '_downs'),
                           #Field('title'),
                           #Field('firsttext'),
                           #Field('description'),
                           #Field('over_18'),
                           #Field('sr_type','type'),
                           ),
               Link:      (Field('contents','title', tokenize = True),
                           Field('boost', lambda t: int(t._hot*1000),
                                 # yes, it's a copy of 'hot'
                                 is_number=True, reverse=True),
Beispiel #10
0
        return ("<ThingField: (%s,%s,%s,%s)>"
                % (self.name,self.cls,self.id_attr,self.lu_attr_name))


search_fields={Thing:     (Field('fullname', '_fullname'),
                           Field('date', '_date',   is_date = True, reverse=True),
                           Field('lang'),
                           Field('ups',   '_ups',   is_number=True, reverse=True),
                           Field('downs', '_downs', is_number=True, reverse=True),
                           Field('spam','_spam'),
                           Field('deleted','_deleted'),
                           Field('hot', lambda t: t._hot*1000, is_number=True, reverse=True),
                           Field('controversy', '_controversy', is_number=True, reverse=True),
                           Field('points', lambda t: (t._ups - t._downs), is_number=True, reverse=True)),
               Subreddit: (Field('contents',
                                 lambda s: ' '.join([unicode_safe(s.name),
                                                     unicode_safe(s.title),
                                                     unicode_safe(s.description),
                                                     unicode_safe(s.firsttext)]),
                                 tokenize = True),
                           Field('boost', '_downs'),
                           #Field('title'),
                           #Field('firsttext'),
                           #Field('description'),
                           #Field('over_18'),
                           #Field('sr_type','type'),
                           ),
Link:      (Field('contents','title', tokenize = True),
                           Field('boost', lambda t: int(t._hot*1000),
                                 # yes, it's a copy of 'hot'
                                 is_number=True, reverse=True),
Beispiel #11
0
 def str_to_python(self, value):
     """
     Convert an 'str' field from solr's xml format to python and return it.
     """
     return unicode_safe(value)