def lucid_training_docs(query, num_records=1000): query_list = query.split() if len(query_list) == 1: #single term term_query = 'content:'+query else: term_query = ' AND '.join(['content:'+q for q in query_list]) #collection = 'crawl_from_long_fashion_blogs_list' collection = 'fashion_crawl_try_20131015' field_string = ','.join( ('content', 'score') ) config = Config() config.http_debug = True solr = Solr(config) json_response = solr.query_solr( collection=collection, query=term_query, field_string=field_string, start=0, rows=num_records) response = simplejson.loads(json_response) response['response']['numFound'] docs = response['response']['docs'] pages = [] for i in range(len(docs)): page = docs[i]['content'][0] pages.append(page.encode('utf-8') ) return pages
collection = 'neiman_marcus_raw_catalog' field_string = ','.join( ( 'cmos_item_code', 'long_desc_text_only', 'detail_bullets' ) ) delim = '\t' multi_value_delim = '.' escape_newlines = False ################################# config = Config() config.http_debug = True solr = Solr(config) worked = solr.query_to_file( out_file_path=out_file_path, query_string=query_string, max_records=max_records, collection=collection, field_string=field_string, delim=delim, multi_value_delim=multi_value_delim, escape_newlines=escape_newlines, ) if worked: print out_file_path + ' Done!' else: