def handle(self, *file_or_urls, **options): new = options.get('new') if new: data = '<delete><query>*:*</query></delete>' r = urllib2.Request(conf.settings.SOLR_URL + 'update?commit=true') r.add_header('Content-Type', 'text/xml') r.add_data(data) f = urllib2.urlopen(r) print "Solr response to deletion request:" print f.read() if file_or_urls: parser = options.get('parser') module = None if parser: if parser.endswith('.py'): parser = parser[:-3] module = __import__('kochief.discovery.parsers.' + parser, globals(), locals(), [parser]) for file_or_url in file_or_urls: if not module: # guess parser based on file extension if file_or_url.endswith('.mrc'): import kochief.discovery.parsers.marc as module if not module: raise mb.CommandError("Please specify a parser.") print "Converting %s to CSV ..." % file_or_url t1 = time.time() data_handle = urllib.urlopen(file_or_url) try: csv_handle = open(CSV_FILE, 'w') record_count = module.write_csv(data_handle, csv_handle, collections=options.get('collections')) finally: csv_handle.close() t2 = time.time() load_solr(CSV_FILE) t3 = time.time() os.remove(CSV_FILE) p_time = (t2 - t1) / 60 l_time = (t3 - t2) / 60 t_time = p_time + l_time rate = record_count / (t3 - t1) print """Processing took %0.3f minutes. Loading took %0.3f minutes. That's %0.3f minutes total for %d records, at a rate of %0.3f records per second. """ % (p_time, l_time, t_time, record_count, rate)
def handle(self, *file_or_urls, **options): new = options.get('new') if new: # create/replace index pass if file_or_urls: parser = options.get('parser') module = None if parser: if parser.endswith('.py'): parser = parser[:-3] module = __import__('kochief.discovery.parsers.' + parser, globals(), locals(), [parser]) for file_or_url in file_or_urls: if not module: # guess parser based on file extension if file_or_url.endswith('.mrc'): import kochief.discovery.parsers.marc as module if not module: raise mb.CommandError("Please specify a parser.") print "Converting %s to CSV ..." % file_or_url t1 = time.time() data_handle = urllib.urlopen(file_or_url) try: csv_handle = open(CSV_FILE, 'w') record_count = module.write_csv(data_handle, csv_handle) finally: csv_handle.close() t2 = time.time() load_solr(CSV_FILE) t3 = time.time() os.remove(CSV_FILE) p_time = (t2 - t1) / 60 l_time = (t3 - t2) / 60 t_time = p_time + l_time rate = record_count / (t3 - t1) print """Processing took %0.3f minutes. Loading took %0.3f minutes. That's %0.3f minutes total for %d records, at a rate of %0.3f records per second. """ % (p_time, l_time, t_time, record_count, rate)
def handle(self, *args, **options): print( 'options, ```%s```' % options ) new = options.get('new') delete_rec = options.get('delete_rec') expired = options.get('expired') not_updated = options.get('not_updated') delete_set_url = options.get('delete_set') optimize = options.get('optimize') if delete_rec: data = '<delete><query>id:%s</query></delete>' % delete_rec r = urllib2.Request(conf.settings.SOLR_URL + 'update?commit=true') r.add_header('Content-Type', 'text/xml') r.add_data(data) f = urllib2.urlopen(r) print "Solr response to deletion request for rec with id: %s" % delete_rec print f.read() if delete_set_url: #Pass in result set url # delete_set_response = simplejson.load(urllib2.urlopen(delete_set_url.strip('"'))) file_handler = urllib2.urlopen( delete_set_url.strip('"') ) delete_set_response = json.loads( file_handler.read() ) delete_set = [] solr_data = "" for doc in delete_set_response['response']['docs']: delete_set.append(doc['id']) solr_data += '<query>id:%s</query>' % doc['id'] delete_set = ",".join(delete_set) print "Deleting %s" % delete_set data = '<delete>%s</delete>' % solr_data r = urllib2.Request(conf.settings.SOLR_URL + 'update?commit=true') r.add_header('Content-Type', 'text/xml') r.add_data(data) f = urllib2.urlopen(r) print "Solr response to deletion request" print f.read() if new: data = '<delete><query>*:*</query></delete>' r = urllib2.Request(conf.settings.SOLR_URL + 'update?commit=true') r.add_header('Content-Type', 'text/xml') r.add_data(data) f = urllib2.urlopen(r) print "Solr response to deletion request:" print f.read() if expired: data = '<delete><query>%s</query></delete>' % conf.settings.EXPIRED_RECORDS_QUERY r = urllib2.Request(conf.settings.SOLR_URL + 'update?commit=true') r.add_header('Content-Type', 'text/xml') r.add_data(data) f = urllib2.urlopen(r) print "Solr response to deletion request for records with a cat date older than the time specified in settings.py." print f.read() if not_updated: data = '<delete><query>%s</query></delete>' % conf.settings.NOT_UPDATED_RECORDS_QUERY r = urllib2.Request(conf.settings.SOLR_URL + 'update?commit=true') r.add_header('Content-Type', 'text/xml') r.add_data(data) f = urllib2.urlopen(r) print "Solr response to deletion request for records not updated since the time specified in settings.py." print f.read() if optimize: print "Will optimize: %s." % conf.settings.SOLR_URL data = '<optimize/>' r = urllib2.Request(conf.settings.SOLR_URL + 'update?commit=true') r.add_header('Content-Type', 'text/xml') r.add_data(data) f = urllib2.urlopen(r) print "Solr response to optimize request." print f.read() if file_or_urls: parser = options.get('parser') module = None if parser: if parser.endswith('.py'): parser = parser[:-3] module = __import__('kochief.discovery.parsers.' + parser, globals(), locals(), [parser]) for file_or_url in file_or_urls: if not module: # guess parser based on file extension if file_or_url.endswith('.mrc'): import kochief.discovery.parsers.marc as module if not module: raise mb.CommandError("Please specify a parser.") print "Converting %s to CSV ..." % file_or_url t1 = time.time() try: data_handle = urllib.urlopen(file_or_url) #For Windows. Urllib will fail on opening a local file. except IOError: data_handle = open(file_or_url) try: #For Windows open as binary or else blank rows will be created in the CSV. csv_handle = open(CSV_FILE, 'wb') record_count = module.write_csv(data_handle, csv_handle, collections=options.get('collections')) finally: csv_handle.close() t2 = time.time() load_solr(CSV_FILE) t3 = time.time() os.remove(CSV_FILE) p_time = (t2 - t1) / 60 l_time = (t3 - t2) / 60 t_time = p_time + l_time rate = record_count / (t3 - t1) print """Processing took %0.3f minutes. Loading took %0.3f minutes. That's %0.3f minutes total for %d records, at a rate of %0.3f records per second. """ % (p_time, l_time, t_time, record_count, rate)