def getSearchDocumentFields(self, valuesCache, name): """ Returns a list of search-fields (GAE search API) for this bone. """ if self.languages: assert isinstance( valuesCache[name], dict ), "The value shall already contain a dict, something is wrong here." if self.validHtml: return [ search.HtmlField(name=name, value=unicode(valuesCache[name].get( lang, "")), language=lang) for lang in self.languages ] else: return [ search.TextField(name=name, value=unicode(valuesCache[name].get( lang, "")), language=lang) for lang in self.languages ] else: if self.validHtml: return [ search.HtmlField(name=name, value=unicode(valuesCache[name])) ] else: return [ search.TextField(name=name, value=unicode(valuesCache[name])) ]
def _document_from_sparql_result(sparql_result): fields = [search.HtmlField(name='label', value=sparql_result['label'])] if 'htmlDescription' in sparql_result: fields.append( search.HtmlField(name='description', value=sparql_result['htmlDescription'])) if 'logo' in sparql_result: fields.append( search.AtomField(name='logo', value=sparql_result['logo'])) return search.Document(doc_id=sparql_result['sepakeuri'], fields=fields, rank=sparql_result['rank'].value)
def get_field(self,key,value): try: if(self.mapping[key]): if(self.mapping[key]=='TEXTFIELD'): return search.TextField(name=key,value=value); elif(self.mapping[key]=='ATOMFIELD'): return search.AtomField(name=key,value=value); elif(self.mapping[key]=='NUMBERFIELD'): return search.NumberField(name=key,value=value); elif(self.mapping[key]=='HTMLFIELD'): return search.HtmlField(name=key,value=value); elif(self.mapping[key]=='DATEFIELD'): timestamp=value; timestamp = timestamp if timestamp>0 else -timestamp; value=datetime.fromtimestamp(timestamp/1000.0); return search.DateField(name=key,value=value); elif(self.mapping[key]=='GEOFIELD'): return search.GeoField(name=key,value=value); else: return None; else: return None; except KeyError,keyError: print(keyError); return None;
def post(self): headline = self.request.get("headline") content = self.request.get("content") author = self.request.get("author") sideheadline = self.request.get("sideheadline") featured = int(self.request.get("featured")) tags = (self.request.get("tags")).split(',') if self.request.get('picture'): piclink = self.request.get('picture') tempvar="upload/c_scale,h_900,q_auto:good,w_1600" picture =piclink.replace('upload',tempvar) else: picture = "/images/default.jpg" a = article(headline = headline,tags = tags,content =content,author = author,picture = picture,sideheadline = sideheadline,featured = featured) key = a.put() article_id = key.id() fields = [search.TextField(name = "headline", value = headline), search.TextField(name = "sideheadline", value = sideheadline), search.TextField(name = "tags", value = ",".join(tags)), search.HtmlField(name = "content", value = content), search.TextField(name = "author", value = author), search.DateField(name = "dateCreated", value = datetime.datetime.now().date())] doc = search.Document(doc_id = str(article_id), fields = fields) memcache.delete(key='homepage') try: add_result = search.Index(name="Articles").put(doc) #wait for a small duration so that memcache is cleared before it can be reused time.sleep(0.1) self.redirect('/') except search.Error: self.request.out.write("adding to index failed")
def index_artifact(index_, id_, fields): f = [] for i in xrange(0, len(fields), 3): if fields[i] == ATOMFIELD: f.append(search.AtomField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == TEXTFIELD: f.append(search.TextField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == HTMLFIELD: f.append(search.HtmlField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == NUMBERFIELD: f.append( search.NumberField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == DATEFIELD: f.append(search.DateField(name=fields[i + 1], value=fields[i + 2])) elif fields[i] == GEOPOINTFIELD: f.append(search.GeoField(name=fields[i + 1], value=fields[i + 2])) doc = search.Document(doc_id=id_, fields=f) retry_count = 0 while True: try: index_.put(doc) break except search.Error as e: if retry_count < 3: log.warning( 'Error put doc into index, could be out of space. Creating new index' ) index_ = search.Index(index_.name[:-4] + str(int(index_.name[-4:])).zfill(4), namespace=index_.namespace) retry_count += 1 else: raise e
def create_searchable_docs(root, file_path, locales=None): searchable_docs = [] fields_list = _get_fields_from_file(root, file_path, locales=locales) for field_names_to_values in fields_list: parsed_fields = dict(field_names_to_values) try: fields = [ search.AtomField( name='locale', value=parsed_fields['locale']), search.AtomField( name='path', value=(parsed_fields.get('permalink_path') or parsed_fields.get('doc_id')), language=parsed_fields['language']), search.TextField( name='title', value=parsed_fields['title'], language=parsed_fields['language']), search.HtmlField( name='html', value=parsed_fields['html'], language=parsed_fields['language']), ] existing_fields = ['locale', 'path', 'title', 'html'] for name, value in field_names_to_values: fields.append(search.TextField(name=name, value=value, language=parsed_fields['language'])) doc = search.Document(doc_id=parsed_fields['doc_id'], fields=fields) searchable_docs.append(doc) except Exception as e: logging.error('Error indexing doc -> {}'.format(field_names_to_values)) raise return searchable_docs
def _indexIssue(issue, updates): """Does the actual work of indexing the given issue. We expect to be called in a deferred handler.""" fields = [ search.TextField(name="summary", value=issue.summary), search.TextField(name="description", value=issue.description), search.AtomField(name="id", value=str(issue.key().id_or_name())), search.AtomField(name="type", value=issue.type), search.NumberField(name="priority", value=issue.priority), search.AtomField(name="state", value=issue.state), search.AtomField(name="resolution", value=issue.resolution) ] if not updates: issue, updates = getIssue(issue.key().id()) comments = "" for update in updates: if update.comment: if comments: comments += "\r\n<hr />\r\n" comments += update.comment fields.append(search.HtmlField(name="comments", value=comments)) doc = search.Document(doc_id=str(issue.key()), fields=fields) index = search.Index(name="issue") index.put(doc)
def after_insert(self, fields, id): if DEBUG: print 'after insert', fields, id fieldlist = [] for f in self.fieldnames: #fieldlist.append(name=fieldname, fields[fieldname]), #Proposed mapping of fieldtypes to search types is as follows: #string - textfield #test - html field #datetime & datetime to date fields #list:string woudl be added later and would need to iterate through if self.table[f].type == 'string': fieldvalue = search.TextField(name=f, value=fields[f]) fieldlist.append(fieldvalue) elif self.table[f].type == 'list:string': for listvalue in fields[f]: if listvalue: fieldvalue = search.TextField(name=f, value=listvalue) fieldlist.append(fieldvalue) elif self.table[f].type == 'date' or self.table[ f].type == 'datetime': fieldvalue = search.DateField(name=f, value=fields[f]) fieldlist.append(fieldvalue) else: #should be text fieldvalue = search.HtmlField(name=f, value=fields[f]) fieldlist.append(fieldvalue) strid = self.table._tablename + '.' + str(id) my_document = search.Document(doc_id=strid, fields=fieldlist) add_result = self.index.put(my_document) if DEBUG: pass return True
def _indexForumThread(forum_thread, new_forum_post = None): """Does the actual work of indexing the given forum thread. We expect to be called in a deferred handler.""" forum= forum_thread.forum fields = [search.TextField(name="subject", value=forum_thread.subject), search.DateField(name="posted", value=forum_thread.posted), search.DateField(name="last_post", value=forum_thread.last_post), search.AtomField(name="forum", value=forum.slug.replace(":", "_"))] if forum.alliance: fields.append(search.AtomField(name="alliance", value=forum.alliance.replace(":", "_"))) else: fields.append(search.AtomField(name="alliance", value="NA")) content = "" for forum_post in model.forum.ForumPost.all().ancestor(forum_thread).order("posted"): if new_forum_post and str(forum_post.key()) == str(new_forum_post.key()): new_forum_post = None content += "\r\n<hr />\r\n" + forum_post.content if new_forum_post: content = new_forum_post.content + content fields.append(search.HtmlField(name="content", value=content)) doc = search.Document( doc_id = str(forum_thread.key()), fields = fields) index = search.Index(name="forum") index.put(doc)
def IndexPages(title, idpage, summary, content): return search.Document( doc_id = idpage, fields=[search.TextField(name='title', value=title), search.AtomField(name='idpage',value=idpage), search.TextField(name='summary', value=summary), search.HtmlField(name='content', value=content) ] )
def addpost(): document = search.Document(fields=[ search.HtmlField( name="content", value= u"核心提示:到底是什么导致全球变暖?英国科学家们给出了一个“匪夷所思”的答案:生活在中生代的食草类恐龙排出大量甲烷气体,是全球变暖的重要因素。这一研究结果于近期发表在《当代生物学》杂志上" ) ]) search.Index(name="Post").put(document) document = search.Document(fields=[ search.HtmlField( name="content", value= u"核心提示:据媒体调查称,2009年住建部曾对全国城市饮用水水质状况做普查,但至今未公布数据结果。多位业内人士称,此次检测结果实际合格率仅50%左右。调查显示全国城市供水管网质量普遍低劣,不符国标的灰口铸铁管占一半。目前,北京上海等大城市饮用水仍无法直饮。" ) ]) search.Index(name="Post").put(document) return "ok"
def search_document(self): return search.Document(doc_id=str(self.key()), fields=[ search.TextField(name='name', value=self.name), search.NumberField(name='price', value=self.price), search.HtmlField(name='description', value=self.description) ])
def CreateDocument(author, path, path_link, content): return search.Document(doc_id=path, fields=[ search.TextField(name='author', value=author), search.TextField(name='path', value=path), search.TextField(name='path_link', value=path_link), search.HtmlField(name='content', value=content), search.DateField(name='date', value=datetime.now().date()) ])
def index_quote(self, quote): index = search.Index("quotes") document = search.Document( doc_id=str(quote.key()), fields=[ search.TextField(name="user", value=str(quote.user.key())), search.TextField(name="book", value=str(quote.epub.key())), search.TextField(name="file", value=str(quote.file.key())), search.HtmlField(name="html", value=quote.html) ]) index.add(document)
def getSearchDocumentFields(self, valuesCache, name, prefix = ""): """ Returns a list of search-fields (GAE search API) for this bone. """ if valuesCache.get(name) is None: # If adding an entry using an subskel, our value might not have been set return [] if self.languages: assert isinstance(valuesCache[name], dict), "The value shall already contain a dict, something is wrong here." if self.validHtml: return [search.HtmlField(name=prefix + name, value=unicode(valuesCache[name].get(lang, "")), language=lang) for lang in self.languages] else: return [search.TextField(name=prefix + name, value=unicode(valuesCache[name].get(lang, "")), language=lang) for lang in self.languages] else: if self.validHtml: return [search.HtmlField(name=prefix + name, value=unicode(valuesCache[name]))] else: return [search.TextField(name=prefix + name, value=unicode(valuesCache[name]))]
def to_search_document(self): doc = search.Document( doc_id=self.key.urlsafe(), fields=[ search.TextField(name='tags', value=" ".join(self.tags)), search.TextField(name='title', value=self.title), search.HtmlField(name='description', value=self.description), search.GeoField(name='location', value=search.GeoPoint(self.location.lat, self.location.lon)), ], ) return doc
def getSearchDocument(self): # TODO: add date added field # search.DateField(name='birthday', value=datetime(year=1960, month=6, day=19)), return search.Document(doc_id=str(self.key.id()), fields=[ search.TextField(name='title', value=self.title), search.HtmlField(name='description', value=self.bodyHtml), search.TextField(name='tags', value=self.tags), search.TextField(name='product_type', value=self.productType), ])
def create_document(algorithm_id, algorithm_summary, display_name, link_url): """creates a search.Document. :param algorithm_id: :param algorithm_summary: HTML :param display_name: :param link_url: :rtype : google.appengine.api.search.Document """ return search.Document(doc_id=algorithm_id, fields=[ search.TextField(name='algorithmId', value=algorithm_id), search.HtmlField(name='algorithmSummary', value=algorithm_summary), search.TextField(name='displayName', value=display_name), search.TextField(name='linkURL', value=link_url), search.DateField(name='date', value=datetime.now()) ])
def create_test_documents_list(data_list, documents, length): """ Prepare test documents""" for i in range(length): document = search.Document( doc_id=data_list[i]['algorithmId'], fields=[ search.TextField(name='algorithmId', value=data_list[i]['algorithmId']), search.HtmlField(name='algorithmSummary', value=data_list[i]['algorithmSummary']), search.TextField(name='displayName', value=data_list[i]['displayName']), search.TextField(name='linkURL', value=data_list[i]['linkURL']), search.DateField(name='date', value=datetime.now()) ]) documents.append(document)
def create_document(): document = search.Document( # Setting the doc_id is optional. If omitted, the search service will # create an identifier. doc_id='PA6-5000', fields=[ search.TextField(name='customer', value='Joe Jackson'), search.HtmlField(name='comment', value='this is <em>marked up</em> text'), search.NumberField(name='number_of_visits', value=7), search.DateField(name='last_visit', value=datetime.now()), search.DateField(name='birthday', value=datetime(year=1960, month=6, day=19)), search.GeoField(name='home_location', value=search.GeoPoint(37.619, -122.37)) ]) return document
def put_into(self): one = Index.get(doc_id=str(self.post_id)) if one: Index.delete([str(self.post_id)]) document = search.Document(doc_id=str(self.post_id), fields=[ search.TextField(name="title", value=self.title), search.HtmlField(name="content", value=self.content), search.DateField( name="create_date", value=datetime.date.fromtimestamp( self.create_date)) ]) Index.put(document) self.put()
def addpost(post): content = post.content document = search.Document(doc_id=str(post.key.id()), language='zh', fields=[ search.TextField(name='title', value=post.title), search.TextField(name='tags', value=' '.join(post.tags)), search.HtmlField(name='content', value=content), search.AtomField( name='author', value=post.author.nickname()), search.DateField(name='published', value=post.date) ]) search.Index(name="article_index").put(document)
def _create_doc(self, report): doc_fields = [ search.TextField(name='report_type', value=report.report_type), search.TextField(name='thread_id', value=report.thread_id), search.TextField(name='history_id', value=report.history_id), search.DateField(name='date_received', value=report.date_received), search.DateField(name='date_reported', value=report.date_reported), search.DateField(name='date_responded', value=report.date_responded), search.TextField(name='has_responded', value=str(report.has_responded)), search.TextField(name='status', value=report.status), search.TextField(name='sender', value=str(report.sender)), search.TextField(name='reported_by', value=report.reported_by), search.TextField(name='subject', value=report.subject), search.HtmlField(name='html', value=report.html), search.TextField(name='text', value=report.text) ] return search.Document(doc_id=str(report.key.id()), fields=doc_fields)
def save_activity_search_document(a): restricted = "yes" if a.access.domain_restricted else "no" doc = search.Document( doc_id=a.key.urlsafe(), fields=[ search.HtmlField(name='content', value=a.object_.content), search.DateField(name='published', value=a.published.date()), search.AtomField(name='visibility', value=a.access.visibility), search.AtomField(name='restricted', value=restricted), search.AtomField(name='community', value=a.access.community_name), search.AtomField(name='provider', value=a.provider), search.AtomField(name='verb', value=a.verb), search.AtomField(name='author', value=a.actor.get().user.get().primary_email), search.AtomField(name='google_id', value=a.key.id()), ]) try: index = search.Index(name="activities") index.put(doc) except search.Error: logging.exception('PUT of Activity Document FAILED')
def index_epub(self, epub, index_name, user=None): index = search.Index(index_name) for internal in epub.internals(): if internal.isContentFile(): logging.info("Indexing " + internal.path) internal_id = str(internal.key()) existing = index.list_documents(internal_id, limit=1) for document in existing: if document.doc_id == internal_id: for field in document.fields: if field.name == "owners" and field.value is not None and field.value.find( user) == -1: user = field.value if user is None else + "|\n|" + user document = search.Document( doc_id=internal_id, fields=[ search.TextField( name="owners", value="public" if user is None else user), search.TextField(name="book", value=str(epub.key())), search.TextField(name="name", value=internal.name), search.HtmlField(name="html", value=internal.text) ]) index.add(document)
def after_update(self, queryset, fields): return True # not using this for current app as causes more harm than good if DEBUG: print 'after update', queryset, fields for id in self.get_ids(queryset): fieldlist = [] for f in self.fieldnames: update = False if f in fields and fields[f]: #fieldlist.append(name=fieldname, fields[fieldname]), #this is a bit crude - setting everything as htmal field regardless of type #may need some sort of extra field value on table to set the types you want to do this properly if self.table[f].type == 'string': fieldvalue = search.TextField(name=f, value=fields[f]) fieldlist.append(fieldvalue) elif self.table[f].type == 'list:string': for listvalue in fields[f]: if listvalue: fieldvalue = search.TextField(name=f, value=listvalue) fieldlist.append(fieldvalue) elif (self.table[f].type == 'date' or self.table[f].type == 'datetime'): fieldvalue = search.DateField(name=f, value=fields[f]) fieldlist.append(fieldvalue) else: #should be text fieldvalue = search.HtmlField(name=f, value=fields[f]) fieldlist.append(fieldvalue) update = True if update: strid = self.table._tablename + '.' + str(id) my_document = search.Document(doc_id=strid, fields=fieldlist) add_result = self.index.put(my_document) if DEBUG: pass return True