Example #1
0
def lucid_training_docs(query, num_records=1000):        
    query_list = query.split()
    if len(query_list) == 1:  #single term
        term_query = 'content:'+query
    else:
        term_query = ' AND '.join(['content:'+q for q in query_list])

    #collection = 'crawl_from_long_fashion_blogs_list'
    collection = 'fashion_crawl_try_20131015'
    field_string = ','.join( ('content', 'score') )
    
    config = Config()
    config.http_debug = True
    solr = Solr(config)

    json_response = solr.query_solr(
        collection=collection,
        query=term_query,
        field_string=field_string,
        start=0,
        rows=num_records)

    response = simplejson.loads(json_response)
    response['response']['numFound']

    docs = response['response']['docs']
    
    pages = []
    for i in range(len(docs)):
        page = docs[i]['content'][0]
        pages.append(page.encode('utf-8') )

    return pages
Example #2
0
collection = 'neiman_marcus_raw_catalog'
field_string = ','.join(
	(
        'cmos_item_code',
        'long_desc_text_only',
        'detail_bullets'        
	)
)
delim = '\t'
multi_value_delim = '.'
escape_newlines = False
#################################

config = Config()
config.http_debug = True
solr = Solr(config)

worked = solr.query_to_file(
	out_file_path=out_file_path,
    query_string=query_string,
    max_records=max_records,
    collection=collection,
    field_string=field_string,
    delim=delim,
    multi_value_delim=multi_value_delim,
    escape_newlines=escape_newlines,
)

if worked:
	print out_file_path + ' Done!'
else: