def run(self): """ Drops all indices """ global_coll = solr.global_collection() for doc in solr.get_all_docs_by_query(global_coll, '*:*', fields='id'): solr.collection_for_wiki(doc['id']).drop() global_coll.drop() solr.all_pages_collection().drop() solr.all_topics_collection().drop() solr.all_user_pages_collection().drop() solr.wiki_user_collection().drop() solr.user_collection().drop()
def get_row(self): """ Returns the row from the DB as a dict :return: row data :rtype: dict """ for doc in solr.get_all_docs_by_query(solr.user_collection(), 'name_txt_en:"%s"' % self.user_name): return doc['attr_entities']
def get_row(self): """ Returns the row from the DB as a dict :return: row data :rtype: dict """ for doc in solr.get_all_docs_by_query(solr.user_collection(), "*:*", fields=','.join(self.fields+['attr_entities'])): return doc
def run(self): """ Drops all indices """ global_coll = solr.global_collection() print 'global' global_coll.optimize() for doc in solr.get_all_docs_by_query(global_coll, '*:*', fields='id'): print doc['id'] solr.collection_for_wiki(doc['id']).optimize() print 'all pages' solr.all_pages_collection().optimize() print 'all topics' solr.all_topics_collection().optimize() print 'all user pages' solr.all_user_pages_collection().optimize() print 'wiki user' solr.wiki_user_collection().optimize() print 'user' solr.user_collection().optimize()
def get_users(self, limit=10, offset=0, **kwargs): """ Gets users for a given topic :param limit: the number of users we want :type limit: int :param offset: offset :type offset: int :return: a list of objects related to authors :rtype: list """ collection = solr.user_collection() return solr.get_docs_by_query_with_limit(collection, self.topic, limit=limit, offset=offset, boost='scaled_authority_f', fields=','.join(UserModel.fields), **sans_q(kwargs))
def get_users(self, limit=10, offset=0, for_api=False): """ Gets users for a given topic :param limit: the number of users we want :type limit: int :param offset: offset :type offset: int :param for_api: if it's for the api, we add less :type for_api: bool :return: a list of objects related to authors :rtype: list """ collection = solr.user_collection() return solr.get_docs_by_query_with_limit(collection, self.topic, limit=limit, offset=offset, boost='scaled_authority_f')
def analyze_users_globally(): print "Analyzing Users..." user_collection = solr.existing_collection(solr.user_collection()) wiki_user_collection = solr.wiki_user_collection() id_to_docs = dict() for user_doc in solr.get_all_docs_by_query(wiki_user_collection, '*:*'): # these are gonna be wiki-id_user-id doc_id = user_doc['id'].split('_').pop() if doc_id not in id_to_docs: id_to_docs[doc_id] = dict(id=doc_id, attr_entities={'set': []}, name_s={'set': user_doc['name_s']}, name_txt_en={'set': user_doc['name_txt_en']}, wikis_is={'set': []}, attr_wikis={'set': []}, authorities_fs={'set': []}, total_authority_f={'set': 0}, scaled_authority_f={'set': 0}) try: map(id_to_docs[doc_id]['attr_entities']['set'].append, user_doc['attr_entities']) id_to_docs[doc_id]['wikis_is']['set'].append(user_doc['wiki_id_i']) id_to_docs[doc_id]['attr_wikis']['set'].append(user_doc['wiki_name_txt']) id_to_docs[doc_id]['authorities_fs']['set'].append(user_doc['total_page_authority_f']) except KeyError: pass # zero f***s id_to_total_authorities = dict([(uid, sum(doc['authorities_fs']['set'])) for uid, doc in id_to_docs.items()]) user_scaler = MinMaxScaler(id_to_total_authorities.values()) for uid, total_authority in id_to_total_authorities.items(): id_to_docs[uid]['total_authority_f']['set'] = total_authority id_to_docs[uid]['scaled_authority_f']['set'] = user_scaler.scale(total_authority) user_collection.add(id_to_docs.values()) user_collection.commit()
def search(**kwargs): return solr.get_docs_by_query_with_limit(solr.user_collection(), kwargs['q'], boost='scaled_authority_f', fields=','.join(UserModel.fields), **sans_q(kwargs))