def update(args, FIELDS_URL): """modify properties of a Field""" data = parse_opts(args) if 'name' not in data: raise Exception("Updating a Field requires a name") name = data['name'] del data['name'] json_http(FIELDS_URL + "/" + name, method='PUT', data=data) print "Updated Field: " + name
def update(args, DS_URL): """modify properties of a datasource""" data = parse_opts(args) i = get_id(data) if i is None: raise Exception( "Updating a DataSource requires either an id or a name") json_http(DS_URL + "/" + i, method='PUT', data=data) print "Updated DataSource #" + i
def delete(args, FIELDS_URL): """remove a Field""" if 1 != len(args): raise Exception("Deleting a Field requires a name") name = args.pop(0) # be helpful if they get the syntax confused if (0 == name.find('name=')): name = name.replace('name=', '', 1) json_http(FIELDS_URL + "/" + name, method='DELETE') print "Deleted Field: " + name
def schedule(args, DS_URL): """modify the schedule of a datasource""" data = parse_opts(args) i = get_id(data) if i is None: raise Exception( "Modifying the schedule of a DataSource requires either an id or a name" ) json_http(DS_URL + "/" + i + '/schedule', method='PUT', data=data) print "Updated Schedule of DataSource #" + i
def delete(args, DS_URL): """remove a datasource""" if 1 != len(args): raise Exception("wrong number of args for deleting a DataSource") i = get_id(parse_opts(args)) if i is None: raise Exception( "Must supply either an id or a name for a DataSource to delete it") json_http(DS_URL + '/' + i, method='DELETE') print "Deleted DataSource #" + i
def create_filters(filters, users): #roles = lweutils.json_http(lweutils.COL_URL + "/roles", method="GET") #for role in roles: # print role #{u'groups': [], u'users': [u'admin'], u'filters': [u'*:*'], u'name': u'DEFAULT'} #{u'groups': [], u'users': [u'user.10'], u'filters': [u'symbol:AES'], u'name': u'user10'} filters_split = filters.split(";") for the_filter in filters_split: print "Applying filter: " + the_filter splits = the_filter.split("=") #curl -H 'Content-type: application/json' # -d '{"name": "ONLY_PUBLIC","groups": ["group1","group2"],"filters": ["status:public"], # "users": ["user1"]}' http://localhost:8888/api/collections/collection1/roles # rolename=uids=query;uids=query the_users = [] uids = splits[1].split(",") for uid in uids: the_users.append(uid) data = {"name": splits[0], "users": the_users, "filters": splits[2]} print "Sending Data to:" + COL_URL + "/roles" print data result = lweutils.json_http(COL_URL + "/roles", method="POST", data=data) print "Result:" print result
def get_id(opts, DS_URL): """ Determines the id of the datasource the client is interested in either because it was explicitly mentioned, or by looking up the name. The 'id' key is removed from the data if present. The 'name' key is removed from the data if it was used to lookup the id """ if 'id' in opts: i = str(opts['id']) del opts['id'] return i if 'name' in opts: ids = [] name = opts['name'] del opts['name'] data = json_http(DS_URL) for ds in data: if 'name' in ds and ds['name'] == name: ids.append(ds['id']) if 0 == len(ids): raise Exception("Can't locate a DataSource with name=" + name) if 1 != len(ids): raise Exception("Multiple DataSource's found with name=" + name + ": " + str(ids)) return str(ids.pop()) return None
def status(args, DS_URL): """display status of datasources""" if 1 < len(args): raise Exception("wrong number of args for showing DataSource status") i = get_id(parse_opts(args)) if (i is not None): url = DS_URL + '/' + i + '/status' data = json_http(url) print "Status of DataSource #" + i + ": " + url + " => " + pretty_json( data) else: url = DS_URL + "/all/status" data = json_http(url) print "Status of All DataSources: " + url + " => " + pretty_json(data)
def show(args, DS_URL): """display current datasources""" if 1 < len(args): raise Exception("wrong number of args for showing a DataSource") i = get_id(parse_opts(args)) if (i is None): print 'Data Sources: ' + DS_URL data = json_http(DS_URL) if 0 == len(data): print ' (none)' else: for ds in data: print_ds(ds, ' ') else: print_ds(json_http(DS_URL + '/' + i))
def print_ds(data, DS_URL, indent=''): i = str(data['id']) dsu = DS_URL + '/' + i print indent + "Data Source #" + i + ': ' indent = indent + ' ' print indent + "Info: " + dsu + ' => ' + pretty_json(data, indent) status_url = dsu + "/status" status = json_http(status_url) print indent + "Status: " + status_url + " => " + pretty_json( status, indent) sched_url = dsu + "/schedule" sched = json_http(sched_url) print indent + "Schedule: " + sched_url + " => " + pretty_json( sched, indent)
def show(args, SETTINGS_URL): """display settings """ label = 'Index Settings' url = SETTINGS_URL if (0 < len(args)): label += ' (' + ', '.join(args) + ')' url += '/' + ','.join(args) data = json_http(url) print label + ': ' + url + ' => ' + pretty_json(data)
def show(args, COL_URL): """display current collection info""" label = 'Collection Info' url = COL_URL + "/info" if (0 < len(args)): label += ' (' + ', '.join(args) + ')' url += '/' + ','.join(args) data = json_http(url) print label + ": " + url + " => " + pretty_json(data)
def create_collection(name): data = {"name": name} try: print "Trying: " + name rsp = lweutils.json_http(API_URL + "/collections", method='POST', data=data) print "Created New Collection: " + data['name'] except Exception as e: traceback.print_exc()
def create(args, FIELDS_URL): """create a field""" data = parse_opts(args) for arg in ('name', 'field_type'): if arg not in data: raise Exception("Creating a Field requires a " + arg) rsp = json_http(FIELDS_URL, method='POST', data=data) print "Created New Field: " + data['name'] + " at: " + FIELDS_URL
def show(args, FIELDS_URL): """display current fields""" if 1 < len(args): raise Exception("wrong number of args for showing fields") if (0 < len(args)): name = args.pop(0) # be helpful if they get the syntax confused if (0 == name.find('name=')): name = name.replace('name=', '', 1) url = FIELDS_URL + "/" + name data = json_http(url) print "Field " + name + ": " + url + " => " + pretty_json(data, ' ') else: print 'Fields: ' + FIELDS_URL data = json_http(FIELDS_URL) if 0 == len(data): print ' (none)' else: for field in data: name = field['name'] url = FIELDS_URL + '/' + name print " Field: " + name + ": " + url + " => " + pretty_json( field, ' ')
def create(args, DS_URL, added_data=None): """create a datasource""" data = parse_opts(args) if 'name' not in data: raise Exception("Creating a DataSource requires a name") if 'type' not in data: raise Exception("Creating a DataSource requires a type") if 'crawler' not in data: raise Exception("Creating a DataSource requires a crawler") if added_data: data.update(added_data) rsp = json_http(DS_URL, method='POST', data=data) print "Created New DataSource: " + str( rsp['id']) + " with name: " + data['name'] + " at " + DS_URL return rsp['id']
def history(args, DS_URL): """display the indexing history of a datasource""" if 1 != len(args): raise Exception("wrong number of args for showing a DataSource") i = get_id(parse_opts(args)) if i is None: raise Exception( "Must supply either an id or a name to view the indexing history of a DataSource" ) url = DS_URL + "/" + i + '/history' data = json_http(url) print "History of DataSource #" + i + ": " + url + " => " + pretty_json( data)
def create_press_crawler(stock): #data = {"mapping": {"mappings": {"symbol": "symbol", "open": "open", "high": "high", "low": "low", "close": "close", # "trade_date":"trade_date", # "volume": "volume", # "adj_close": "adj_close"}}} url = "http://finance.yahoo.com/q/p?s=" + stock + "+Press+Releases" include_paths = [ "http://finance\.yahoo\.com/news/.*", "http://finance\.yahoo\.com/q/p\?s=" + stock + "+Press+Releases" ] id = ds.create([ "name=PressRelease_" + stock, "type=web", "bounds=none", "url=" + url, "crawler=lucid.aperture", "crawl_depth=2", "include_paths=" + include_paths[0], "include_paths=" + include_paths[1] ], DS_URL) rsp = lweutils.json_http(COL_URL + "/datasources/" + id + "/job", method="PUT") return id
def add_twitter(i, stock_lists, stocks, access_token, consumer_key, consumer_secret, token_secret): args = [ "name=Twitter_" + str(i), "access_token=" + access_token, "consumer_key=" + consumer_key, "consumer_secret=" + consumer_secret, "token_secret=" + token_secret, "type=twitter_stream", "crawler=lucid.twitter.stream", "sleep=10000" ] print stock_lists symbols = "" for symbol in stock_lists: symbols += "$" + symbol + ", " + stocks[symbol][1] + ", " #args.append("filter_track=$" + symbol) #args.append("filter_track=" + stocks[symbol][1]) args.append("filter_track=" + symbols[:len(symbols) - 1]) data = {"mapping": create_twitter_mappings()} id = ds.create(args, DS_URL, data) #rsp = lweutils.json_http(lweutils.COL_URL + "/datasources/" + id + "/mapping", method="PUT", data=data) rsp = lweutils.json_http(COL_URL + "/datasources/" + id + "/job", method="PUT")
def standard(name=None): # do a match all request query = "*:*" start = 0 user = None sort_criteria = None if request.method == 'POST' and 'search_box' in request.form: query = request.form['search_box'] else: if request.args.get('q'): query = request.args.get('q') if request.method == 'POST' and "user" in request.form: user = request.form['user'] else: if request.args.get('user'): user = request.args.get('user') if request.args.get('start'): start = request.args.get('start') fq = [] if request.args.get('fq'): fq = request.args.getlist('fq') active = "Results" if request.args.get('active'): active = request.args.get('active') if request.args.get('sort_criteria'): sort_criteria = request.args.get('sort_criteria') dsn_results = "data_source_name:HistoricalPrices" source_filters = [] group = "false" group_field = "symbol" if active == "Results": source_filters.append("-" + dsn_results) else: #Historical, do grouping source_filters.append(dsn_results) group = "true" if sort_criteria == None: sort_criteria = "trade_date" # &facet.date=timestamp&facet.date.start=2013-10-08T14:17:49.04Z&facet.date.end=NOW/DAY%2B1DAY&facet.date.gap=%2B1HOUR app.logger.info("Query: " + query) kwargs = { "qt": "/lucid", "facet": "true", "start": start, "fl": "*,score", "facet.date": "timestamp", "facet.date.start": "NOW/DAY-30DAY", "facet.date.end": "NOW/DAY+1DAY", "facet.date.gap": "+1DAY", "facet.date.other": "all", "facet.range": ["open", "close", "volume"], "facet.range.start": "0", "facet.range.end": "1000", "facet.range.gap": "100", "facet.range.other": "all", "facet.mincount": "1", "f.open.facet.limit": "5", "f.close.facet.limit": "5", "f.close.open.limit": "5", "f.volume.facet.limit": "5", "f.volume.facet.range.gap": "500000", "f.volume.facet.range.start": "10000", "f.volume.facet.range.end": "5000000", "facet.pivot": ["open,close,volume", "attr_retweetcount,attr_username"], "stats": "true", "stats.field": ["open", "close", "volume"], "fq": source_filters } if fq: kwargs['fq'] = fq #the_role = "DEFAULT" if sort_criteria: kwargs['group.sort'] = sort_criteria + " desc" if active == "Historical": kwargs['group'] = group kwargs['group.field'] = group_field kwargs['group.limit'] = 30 if user and user != 'none': kwargs['user'] = user # we have a user, let's see what roles they play #/api/collections/collection/roles/role print "User: "******"/roles", method="GET") #{u'groups': [], u'users': [u'admin'], u'filters': [u'*:*'], u'name': u'DEFAULT'} #{u'groups': [], u'users': [u'user.10'], u'filters': [u'symbol:AES'], u'name': u'user10'} for role in roles: #print user + ", " + role['users'][0] for role_user in role['users']: if user == role_user: #print role['name'] kwargs['role'] = role[ 'name'] #TODO: Handle multiple roles? params = {'q': query} params.update(kwargs) solr_rsp = solr._select(params) result = solr.decoder.decode(solr_rsp) response = result.get('response') or {} facets = result.get('facet_counts') or {} stats = result.get('stats') or {} grouped = result.get("grouped") highlights = result.get("highlighting") #app.logger.info("Facets: " + facets) numFound = response.get('numFound', 0) result_kwargs = process_solr_rsp(result) results = Results(response.get('docs', ()), numFound, **result_kwargs) page_count = int(math.ceil(numFound / 10.0)) start = response.get('start', 0) current_page_number = int(math.ceil(start / 10.0)) if page_count > 0: current_page_number += 1 else: current_page_number = 1 page_count = 1 #page_count = (int) Math.ceil(results_found / (double) results_per_page); #current_page_number = (int) Math.ceil(start / (double) results_per_page) + (page_count > 0 ? 1 : 0); # #app.logger.info("Saw {0} result(s).".format(len(results))) next_start = start + 10 prev_start = max(start - 10, 0) filter_urls = {} if fq: i = 0 filter_base_url = url_for('standard', start=str(start), q=query) for outer in fq: filter_urls[outer] = filter_base_url for inner in fq: if outer != inner: app.logger.info("Inner: " + inner) filter_urls[outer] += "&fq=" + inner i += 1 current_url = url_for('standard', start=str(start), q=query, fq=fq, active=active) results_url = url_for('standard', start=str(start), q=query, fq=fq, active="Results") historical_url = url_for('standard', start=str(start), q=query, fq=fq, active="Historical") next_url = url_for('standard', start=str(next_start), q=query, fq=fq, active=active) prev_url = url_for('standard', start=str(prev_start), q=query, fq=fq, active=active) app.logger.info("Next: " + next_url) return render_template('standard.jinja2', name=name, search_results=results, fq=fq, the_user=user, grouped=grouped, active=active, filter_urls=filter_urls, raw_response=response, start=start, current_url=current_url, historical_url=historical_url, results_url=results_url, the_facets=facets, the_stats=stats, the_query=query, current_page=current_page_number, next_url=next_url, prev_url=prev_url, the_page_count=page_count, highlights=highlights, users=users, sort_criteria=sort_criteria)
def update(args, SETTINGS_URL): """modify settings""" data = parse_opts(args) json_http(SETTINGS_URL, method='PUT', data=data) print "Updated Settings"