def do_apropos(self, params): definitions = self.client_prog_inst.create_completion_list("define") index = self.client_prog_inst.create_completion_list("index") guids = self.client_prog_inst.create_completion_list("guid") definitions_results = utils.unique_list() index_results = utils.unique_list() guids_results = utils.unique_list() search_for = params.split() work_list = ((definitions, definitions_results), (index, index_results), (guids, guids_results)) for param in search_for: for id_list, results in work_list: for identifier in id_list: found_it = re.search(param, identifier, flags=re.IGNORECASE) if found_it: results.append (identifier) print ("variables:") if definitions_results: for var in definitions_results: print (" ", var) else: print (" no matching variables were found") print ("index items:") if index_results: for iid in index_results: print (" ", iid) else: print (" no matching iids were found") print ("guids:") if guids_results: for guid in guids_results: iids_of_guids = self.client_prog_inst.items_table.get_iids_with_specific_detail_values("guid", guid) print (" ", guid, iids_of_guids) else: print (" no matching guids were found")
def do_common(self, params): iids = shlex.split(params) missing_iids = utils.unique_list() # [iid in iids if iid not in ] for iid in iids: if iid not in self.client_prog_inst.install_definitions_index: missing_iids.append(iid) if missing_iids: print("Could not find in index:", ", ".join(missing_iids)) else: all_needs = list() all_needed_by = list() for iid in iids: needs_list = utils.unique_list() self.client_prog_inst.needs(iid, needs_list) all_needs.append(needs_list) all_needed_by.append(self.client_prog_inst.needed_by(iid)) needs_result = set(all_needs[0]).intersection(*all_needs) needed_by_result = set(all_needed_by[0]).intersection(*all_needed_by) if "__ALL_ITEMS_IID__" in needed_by_result: needed_by_result.remove("__ALL_ITEMS_IID__") if not needs_result: needs_result.add("no one") print("common needs:\n ", ", ".join(needs_result)) if not needed_by_result: needed_by_result.add("no one") print("common needed by:\n ", ", ".join(needed_by_result))
def __init__(self): self.__original_install_items = utils.unique_list() self.__root_install_items = utils.unique_list() self.__update_install_items = utils.unique_list() self.__full_install_items = utils.unique_list() self.__orphan_install_items = utils.unique_list() self.__install_items_by_target_folder = defaultdict(utils.unique_list) self.__no_copy_items_by_sync_folder = defaultdict(utils.unique_list)
def __init__(self, iid): self.__resolved_inherit = False self.__iid = iid self.__name = "" self.__guids = utils.unique_list() self.__remark = "" self.__description = "" self.__inherit_from = utils.unique_list() self.__install_for_os_stack = [InstallItem.os_names[0]] # reading for all platforms ('common') or for which specific platforms ('Mac', 'Win')? self.__items = defaultdict(InstallItem.create_items_section) self.__var_list = None self.__user_data = None self.__last_require_repo_rev = 0
def do_depend(self, params): if params: for param in shlex.split(params): if param not in self.client_prog_inst.install_definitions_index: print(text_with_color(param, 'green'), "not in index") continue needs_list = utils.unique_list() self.client_prog_inst.needs(param, needs_list) if not needs_list: needs_list = ("no one",) depend_text_list = list() for depend in needs_list: if depend.endswith("(missing)"): depend_text_list.append(text_with_color(depend, 'red')) else: depend_text_list.append(text_with_color(depend, 'yellow')) print(text_with_color(param, 'green'), "needs:\n ", ", ".join(depend_text_list)) needed_by_list = self.client_prog_inst.needed_by(param) if needed_by_list is None: print("could not get needed by list for", text_with_color(param, 'green')) else: if not needed_by_list: needed_by_list = ("no one",) needed_by_list = [text_with_color(needed_by, 'yellow') for needed_by in needed_by_list] print(text_with_color(param, 'green'), "needed by:\n ", ", ".join(needed_by_list)) return False
def _put(self): try: self._validate_role(self.body) except ValidationException as exc: return self._respond(message=str(exc), status=400) response = self.db.get(self.db.current_user()) try: roles = response.response['Item']['roles'] except (KeyError, TypeError): return self._respond(message='Not Found', status=404) if self.path_parameters['name'] not in [r['name'] for r in roles]: return self._respond(message='Not Found', status=404) for value in self.body['values']: if not self._is_value_defined(value, response): return self._respond(message=f'Undefined value "{value}"', status=400) for role in roles: if role['name'] == self.path_parameters['name']: role.update({ 'name': self.body['name'], 'values': self.body['values'], 'aliases': unique_list(self.body.get('aliases', [])), }) response = self.db.update(self.db.current_user(), {'roles': roles}) return self._respond(message=response.message, status=response.status)
def get_reactions_list(self) -> List[str]: if self.is_simple_emoji_or_textual_reaction: return [self.text] elif self.is_many_reactions: return unique_list(find_emojis_in_str(self.text)) elif self.is_custom_reaction: return [cast(str, extract_custom_reaction(self.text))] else: raise ValueError("Can't extract reaction")
def __init__(self, initial_vars) -> None: super().__init__(initial_vars) self.total_self_progress: int = 30000 self.read_defaults_file(super().__thisclass__.__name__) self.action_type_to_progress_message = None self.__all_iids_by_target_folder = defaultdict(utils.unique_list) self.__no_copy_iids_by_sync_folder = defaultdict(utils.unique_list) self.auxiliary_iids = utils.unique_list() self.main_install_targets = list()
def __init__(self, initial_vars): super().__init__(initial_vars) self.total_self_progress = 1000 self.need_items_table = True self.need_info_map_table = True self.read_name_specific_defaults_file(super().__thisclass__.__name__) self.action_type_to_progress_message = None self.__all_iids_by_target_folder = defaultdict(utils.unique_list) self.__no_copy_iids_by_sync_folder = defaultdict(utils.unique_list) self.auxiliary_iids = utils.unique_list() self.main_install_targets = list()
def compact_history(): if hasattr(readline, "replace_history_item"): unique_history = utils.unique_list() for index in reversed(list(range(1, readline.get_current_history_length()))): hist_item = readline.get_history_item(index) if hist_item: # some history items are None (usually at index 0) unique_history.append(readline.get_history_item(index)) unique_history.reverse() for index in range(len(unique_history)): readline.replace_history_item(index + 1, unique_history[index]) for index in reversed(list(range(len(unique_history) + 1, readline.get_current_history_length()))): readline.remove_history_item(index)
def repr_require_for_yaml(self): translate_detail_name = {'require_version': 'version', 'require_guid': 'guid', 'require_by': 'require_by'} retVal = defaultdict(dict) require_details = self.items_table.get_details_by_name_for_all_iids("require_%") for require_detail in require_details: item_dict = retVal[require_detail['owner_iid']] if require_detail['detail_name'] not in item_dict: item_dict[translate_detail_name[require_detail['detail_name']]] = utils.unique_list() item_dict[translate_detail_name[require_detail['detail_name']]].append(require_detail['detail_value']) for item in retVal.values(): for sub_item in item.values(): sub_item.sort() return retVal
def calculate_full_doit_items_set(self, instlObj): """ calculate the set of iids to install by starting with the root set and adding all dependencies. Initial list of iids should already be in self.root_doit_items. If an install items was not found for a iid, the iid is added to the orphan set. """ root_install_iids_translated = utils.unique_list() for root_IID in self.root_doit_items: # if IID is a guid iids_from_guid will translate to iid's, or return the IID otherwise iids_from_the_root_iid = iids_from_guids(instlObj.install_definitions_index, root_IID) for IID in iids_from_the_root_iid: if IID in instlObj.install_definitions_index: root_install_iids_translated.append(IID) else: self.orphan_doit_items.append(IID) self.full_doit_items = root_install_iids_translated
def find_cmd_tool(self, tool_to_find_var_name): """ locate the path to a cmd.exe tool on windows, if found put the full path in variable :param tool_to_find_var_name: variable name of tool or full path to tool :return: the path to the tool """ tool_path = None if tool_to_find_var_name in var_stack: original_tool_value = var_stack.ResolveVarToStr(tool_to_find_var_name) # first try the variable, could be that the tool was already found if os.path.isfile(original_tool_value): tool_path = original_tool_value if tool_path is None: # next try to ask the system using the where command try: where_tool_path = subprocess.check_output("where " + original_tool_value).strip() where_tool_path = utils.unicodify(where_tool_path) if os.path.isfile(where_tool_path): tool_path = where_tool_path var_stack.set_var(tool_to_find_var_name, "find_cmd_tool").append(tool_path) except Exception: pass # never mind, we'll try on our own if tool_path is None: win_paths = utils.unique_list() # try to find the tool in the PATH variable if "PATH" in os.environ: # remove newline characters that might lurk in the path (see tech support case 143589) adjusted_path = re.sub('[\r\n]',"?",utils.unicodify(os.environ["PATH"])) win_paths.extend(adjusted_path.split(";")) else: print("PATH was not found in environment variables") # also add some known location in case user's PATH variable was altered if "SystemRoot" in os.environ: system_root = utils.unicodify(os.environ["SystemRoot"]) know_locations = (os.path.join(system_root, "System32"), os.path.join(system_root, "SysWOW64")) win_paths.extend(know_locations) for win_path in win_paths: tool_path = os.path.join(win_path, original_tool_value) if os.path.isfile(tool_path): var_stack.set_var(tool_to_find_var_name, "find_cmd_tool ").append(tool_path) break else: # break was not called, tool was not found tool_path = None return tool_path
def parse_senses(results): senses = [] exs = [] for r in results: if r.get("senses"): for s in r["senses"]: s["part_of_speech"] = r.get("part_of_speech") s["definition"] = s.get("definition") or '' if s.get("definition") and type(s["definition"]) != list: s["definition"] = [s["definition"]] else: senses.append(s) examples = s.get("examples") or [] for example in examples: if example and example.get("text"): exs.append(example["text"]) return senses, unique_list(exs)
def accumulate_unique_actions(self, action_type, iid_list): """ accumulate action_type actions from iid_list, eliminating duplicates""" unique_actions = utils.unique_list() # unique_list will eliminate identical actions while keeping the order for IID in iid_list: with self.install_definitions_index[IID].push_var_stack_scope() as installi: action_var_name = "iid_action_list_" + action_type item_actions = var_stack.ResolveVarToList(action_var_name, default=[]) num_unique_actions = 0 for an_action in item_actions: len_before = len(unique_actions) unique_actions.append(an_action) len_after = len(unique_actions) if len_before < len_after: # add progress only for the first same action num_unique_actions += 1 action_description = self.action_type_to_progress_message[action_type] if num_unique_actions > 1: action_description = " ".join((action_description, str(num_unique_actions))) unique_actions.append( self.platform_helper.progress("{installi.name} {action_description}".format(**locals()))) self.batch_accum += unique_actions
def __init__(self, data, order_by=None, visible_columns=(), column_order=()): """Create a new table instance with the iterable ``data``. If ``order_by`` is specified, the data will be sorted accordingly. Note that unlike a ``Form``, tables are always bound to data. Also unlike a form, the ``columns`` attribute is read-only and returns ``BoundColum`` wrappers, similar to the ``BoundField``'s you get when iterating over a form. This is because the table iterator already yields rows, and we need an attribute via which to expose the (visible) set of (bound) columns - ``Table.columns`` is simply the perfect fit for this. Instead, ``base_colums`` is copied to table instances, so modifying that will not touch the class-wide column list. """ self._data = data self._snapshot = None # will store output dataset (ordered...) self._rows = Rows(self) self._columns = Columns(self) self.order_by = order_by # Make a copy so that modifying this will not touch the class # definition. Note that this is different from forms, where the # copy is made available in a ``fields`` attribute. See the # ``Table`` class docstring for more information. self.base_columns = copy.deepcopy(self.base_columns) keys_order = list(column_order) + list(self.base_columns.keys()) self.base_columns.keyOrder = unique_list(keys_order) if visible_columns: # set visibility only if visible_columns are specified for fname in self.base_columns.keys(): if not fname in visible_columns: self.base_columns[fname].visible = False if self.always_visible_cols: for fname in self.base_columns.keys(): if fname in self.always_visible_cols: self.base_columns[fname].visible = True
def _post(self): try: self._validate_role(self.body) except ValidationException as exc: return self._respond(message=str(exc), status=400) response = self.db.get(self.db.current_user()) try: roles = response.response['Item']['roles'] except (KeyError, TypeError): roles = [] if self.body['name'] in [r['name'] for r in roles]: return self._respond(message='Resource already exists', status=400) for value in self.body['values']: if not self._is_value_defined(value, response): return self._respond(message=f'Undefined value "{value}"', status=400) roles.append({ 'name': self.body['name'], 'values': self.body['values'], 'aliases': unique_list(self.body.get('aliases', [])), }) response = self.db.update(self.db.current_user(), {'roles': roles}) return self._respond(message=response.message, status=response.status)
def keys(self): the_keys = utils.unique_list() for a_var_list in reversed(self._ConfigVarList_objs): the_keys.extend(list(a_var_list.keys())) return list(the_keys)
def __init__(self): self.root_doit_items = utils.unique_list() self.full_doit_items = utils.unique_list() self.orphan_doit_items = utils.unique_list() self.doit_items_by_target_folder = defaultdict(utils.unique_list) self.no_copy_items_by_sync_folder = defaultdict(utils.unique_list)
def friends_command(update, context): ''' friends => Mostra o contato (@ do Tele) de todas as pessoas com que o usuário já se conectou. ''' start_t = timer( ) # When want to store how long this function takes to complete # facilita na hora de referenciar esse usuario myself = update.effective_user.id my_data = db.get_user_by_id(myself) context.user_data['connections'] = my_data['connections'] if len(context.user_data['connections']) == 0: # Este usuario ainda nao tem conexoes response = "Você ainda não possui nenhuma conexão!\n" response += "Que tal usar o comando /show para conhecer alguém novo?" update.message.reply_text(response) return ConversationHandler.END # Se chegou ate aqui é porque ele tem conexoes connections_set = unique_list(context.user_data['connections']) # Corrige as suas conexoes caso hajam repetições if len(connections_set) < len(context.user_data['connections']): # Existem repeticoes no original context.user_data['connections'] = list(connections_set) db.update_user_by_id(myself, {'connections': context.user_data['connections']}) bottom_msg = "Utilize esses botões para navegar entre as páginas:\n\n" pages_text_list = friends_paginator(connections_set) context.user_data['friend_pages'] = pages_text_list button_pairs = make_buttons(0, len(pages_text_list) - 1) response = pages_text_list[0] if len(button_pairs) != 0: response += bottom_msg # Button pairs consist of (button_text, callback_text) keyboard = [[ InlineKeyboardButton(text, callback_data=callback) for text, callback in button_pairs ]] end_t = timer() ellapsed_t = end_t - start_t db.register_action('friends_command', myself, additional_data={'ellapsed_time': ellapsed_t}) update.message.reply_text(response, reply_markup=InlineKeyboardMarkup(keyboard)) return CHOOSE_PAGE
def original_names_from_wtars_names(self, original_list): replaced_list = utils.unique_list() replaced_list.extend([self.original_name_from_wtar_name(file_name) for file_name in original_list]) return replaced_list
def form_solr_query(args): solr_query = '' api_key = None if api.PARAM_KEY in args: api_key = args[api.PARAM_KEY] logging.info('api_key = %s' % api_key) # args fix up if api.PARAM_START not in args: args[api.PARAM_START] = 1 if api.PARAM_SORT not in args: args[api.PARAM_SORT] = "score" # Generate geo search parameters # TODO: formalize these constants # this is near the middle of the continental US lat = '37' lng = '-95' max_dist = 12400 if args.get(api.PARAM_LAT, None) and args.get(api.PARAM_LNG, None): lat = args[api.PARAM_LAT] lng = args[api.PARAM_LNG] if api.PARAM_VOL_DIST not in args or args[api.PARAM_VOL_DIST] == "": args[api.PARAM_VOL_DIST] = DEFAULT_VOL_DIST max_dist = args[api.PARAM_VOL_DIST] = int(str(args[api.PARAM_VOL_DIST])) if args[api.PARAM_VOL_DIST] < 1: args[api.PARAM_VOL_DIST] = DEFAULT_VOL_DIST max_dist = float(args[api.PARAM_VOL_DIST]) if args.get(api.PARAM_INVITATIONCODE, ''): max_dist = 20030 global GEO_GLOBAL geo_params = ('{!geofilt}&pt=%s,%s&sfield=latlong&d=%s&d1=0' % (str(lat), str(lng), str(max_dist * 1.609)) ) geo_params += "&bf=recip(geodist(),1,150,10)" GEO_GLOBAL = geo_params if (args['is_report'] or (args.get(api.PARAM_TYPE) and args.get(api.PARAM_TYPE, None) != "all") ): geo_params = "" if args['is_report']: GEO_GLOBAL = '' # Running our keyword through our categories dictionary to see if we need to adjust our keyword param if api.PARAM_CATEGORY in args: for key, val in categories.CATEGORIES.iteritems(): if str(args[api.PARAM_CATEGORY]) == val: args[api.PARAM_CATEGORY] = str(key) # keyword original_query = '' query_is_empty = False if (api.PARAM_Q in args and args[api.PARAM_Q] != ""): original_query = args[api.PARAM_Q] qwords = args[api.PARAM_Q].split(" ") for qi, qw in enumerate(qwords): # it is common practice to use a substr of a url eg, volunteermatch # here we transform that to http://*volunteermatch* if qw.find("detailurl:") >= 0 and qw.find("*") < 0: ar = qw.split(":") if len(ar) > 1: ar[1] = "http*" + ar[1] + "*" qw = ":".join(ar) qwords[qi] = qw args[api.PARAM_Q] = ' '.join(qwords) # a category in &q means expand to specific terms as opposed to the # the solr field 'category' which atm may only be 'vetted' args[api.PARAM_Q] = apply_category_query(args[api.PARAM_Q]) if api.PARAM_CATEGORY in args: args[api.PARAM_Q] += (" AND " + args[api.PARAM_CATEGORY]) solr_query += rewrite_query('*:* AND ' + args[api.PARAM_Q], api_key) ga.track("API", args.get(api.PARAM_KEY, 'UI'), args[api.PARAM_Q]) elif api.PARAM_CATEGORY in args: solr_query += rewrite_query('*:* AND ' + args[api.PARAM_CATEGORY], api_key) ga.track("API", args.get(api.PARAM_KEY, 'UI'), args[api.PARAM_CATEGORY]) else: # Query is empty, search for anything at all. query_is_empty = True solr_query += rewrite_query('*:*', api_key) ga.track("API", args.get(api.PARAM_KEY, 'UI'), '*:*') # geo params go in first global KEYWORD_GLOBAL, STATEWIDE_GLOBAL, NATIONWIDE_GLOBAL KEYWORD_GLOBAL = urllib.quote_plus(solr_query) STATEWIDE_GLOBAL, NATIONWIDE_GLOBAL = geocode.get_statewide(lat, lng) solr_query = urllib.quote_plus(solr_query) if api.PARAM_TYPE in args and args[api.PARAM_TYPE] != "all": # Type: these map to the tabs on the search results page # quote plus if args[api.PARAM_TYPE] == "self_directed": solr_query += urllib.quote_plus(" AND self_directed:true") elif args[api.PARAM_TYPE] == "nationwide": nationwide_param = args.get('nationwide', '') if nationwide_param: solr_query += urllib.quote_plus(" AND country:" + nationwide_param) solr_query += urllib.quote_plus(" AND micro:false AND self_directed:false") elif args[api.PARAM_TYPE] == "statewide": statewide_param = args.get('statewide', '') if statewide_param: solr_query += urllib.quote_plus(" AND state:" + statewide_param) else: solr_query += urllib.quote_plus(" AND (statewide:" + STATEWIDE_GLOBAL + " OR nationwide:" + NATIONWIDE_GLOBAL + ")") solr_query += urllib.quote_plus(" AND micro:false AND self_directed:false") elif args[api.PARAM_TYPE] == "virtual": solr_query += urllib.quote_plus(" AND virtual:true AND micro:false AND self_directed:false") elif args[api.PARAM_TYPE] == "micro": solr_query += urllib.quote_plus(" AND micro:true") else: # this keeps the non-geo counts out of the refine by counts fq = '&fq=' fq += urllib.quote('self_directed:false AND virtual:false AND micro:false') solr_query += fq global FULL_QUERY_GLOBAL FULL_QUERY_GLOBAL = solr_query # Source global PROVIDER_GLOBAL if api.PARAM_SOURCE in args and args[api.PARAM_SOURCE] != "all": PROVIDER_GLOBAL = urllib.quote_plus(" AND provider_proper_name:(" + args[api.PARAM_SOURCE] + ")") solr_query += PROVIDER_GLOBAL else: PROVIDER_GLOBAL = "" # for ad campaigns if api.PARAM_CAMPAIGN_ID in args: # we need to exclude the opted out opportunities # they can be tagged as opt_out_all_campaigns # or opt_out_campaign_XXX where XXX is the campaign ID. exclusion = '!categories:%s !categories:%s' % ( 'optout_all_campaigns', 'optout_campaign_' + args[api.PARAM_CAMPAIGN_ID] ) # TODO: campaign_ids are per-campaign, but opportunities # might prefer to opt out of an entire sponsor. # should probably add a 'sponsor_id' to the spreadsheet, # and have optout_sponsor_XXX as well. solr_query += exclusion # set the solr instance we need to use if not given as an arg global BACKEND_GLOBAL BACKEND_GLOBAL, args = get_solr_backend(args) solr_query += apply_boosts(args, original_query); solr_query += apply_filter_query(api_key, args) group_query = '' if args.get(api.PARAM_MERGE, None) == '2': group_query = ("&group=true&group.field=aggregatefield&group.main=true") elif args.get(api.PARAM_MERGE, None) == '3': group_query = ("&group=true&group.field=opportunityid&group.main=true") elif args.get(api.PARAM_MERGE, None) == '4': group_query = ("&group=true&group.field=dateopportunityidgroup&group.main=true&group.limit=7") solr_query += group_query # add the geo params solr_query += '&fq=' + geo_params # add the field list fields_query = '&fl=' if api.PARAM_OUTPUT not in args: fields_query += ','.join(api.DEFAULT_OUTPUT_FIELDS) else: if args[api.PARAM_OUTPUT] in api.FIELDS_BY_OUTPUT_TYPE: fields_query += ','.join(utils.unique_list(api.DEFAULT_OUTPUT_FIELDS + api.FIELDS_BY_OUTPUT_TYPE[args[api.PARAM_OUTPUT]])) else: fields_query += '*' # TODO: we were getting "URL too long errors" fields_query = '&fl=*' solr_query += fields_query return solr_query, group_query, fields_query
def query(query_url, group_query, fields_query, args, cache, dumping=False): """run the actual SOLR query (no filtering or sorting).""" logging.debug("Query URL: " + query_url + '&debugQuery=on') result_set = searchresult.SearchResultSet(urllib.unquote(query_url), query_url, []) result_set.query_url = query_url result_set.args = args result_set.fetch_time = 0 result_set.parse_time = 0 fetch_start = time.time() status_code = 999 ui_query_url = query_url api_key = args.get(api.PARAM_KEY, 'UI') if api_key == 'UI': need_facet_counts = True else: need_facet_counts = False if api_key == 'UI': #For UI searches make two queries one gruoupped by opportunityid to retrieve the VOs IDs and the second to retrieve the dates. # The reason is that because of occurrences pagination can not be kept managed solely by rows. facetOppsQuery = re.sub( 'fl=([*,a-z])', 'fl=opportunityid,feed_providername,event_date_range,title,description,detailurl,latitude,longitude,categorytags&group=true&group.field=opportunityid&group.main=true&group.format=simple', ui_query_url) try: logging.info("calling SOLR for facetOppsQuery: " + facetOppsQuery) facetOppsQuery += '&r=' + str(random.random()) #fetch_result = urlfetch.fetch(facetOppsQuery, deadline = api.CONST_MAX_FETCH_DEADLINE, headers={"accept-encoding":"gzip"},) fetch_result = urlfetch.fetch( facetOppsQuery, deadline=api.CONST_MAX_FETCH_DEADLINE, ) logging.info( "calling SOLR for facetOppsQuery headers: %s " % str(fetch_result.header_msg.getheaders('content-encoding'))) status_code = fetch_result.status_code #unzip response if it is compressed if re.search( 'gzip', str(fetch_result.header_msg.getheaders( 'content-encoding'))) and status_code == 200: gzip_stream = StringIO(fetch_result.content) gzip_file = gzip.GzipFile(fileobj=gzip_stream) result_content = gzip_file.read() else: result_content = fetch_result.content result_content = re.sub(r';;', ',', result_content) result = simplejson.loads(result_content) except: # can get a response too large error here if status_code == 999: logging.warning('solr_search.query error 999 %s' % str(status_code)) else: logging.info('solr_search.query responded %s' % str(status_code)) doc_list = result["response"]["docs"] #logging.info('facetOppsQuery result' + str(doc_list)) opportunityList = list() # empty list for i, entry in enumerate(doc_list): opportunityList.append(entry["opportunityid"]) #logging.info('opportunityList i=' + str(i) + ": v=" +str(entry)) opportunityResults = 'opportunityid:(' + '+OR+'.join( opportunityList) + ')' logging.info('opportunityList =' + opportunityResults) ui_query_url = re.sub('rows=([0-9]+)', 'rows=1000', ui_query_url) ui_query_url = re.sub('start=([0-9]+)', 'start=0', ui_query_url) ui_query_url = re.sub( 'fl=([*,a-z])', 'fl=id,feed_providername,event_date_range,title,description,detailurl,latitude,longitude', ui_query_url) ui_query_url = ui_query_url.replace( '&q=', '&q=' + opportunityResults + '+AND+') try: logging.info("calling SOLR: " + ui_query_url) ui_query_url += '&r=' + str(random.random()) fetch_result = urlfetch.fetch( ui_query_url, deadline=api.CONST_MAX_FETCH_DEADLINE, headers={"accept-encoding": "gzip"}, ) #fetch_result = urlfetch.fetch(ui_query_url, deadline = api.CONST_MAX_FETCH_DEADLINE,) logging.info( "calling SOLR headers: %s " % str(fetch_result.header_msg.getheaders('content-encoding'))) status_code = fetch_result.status_code #unzip response if it is compressed if re.search( 'gzip', str(fetch_result.header_msg.getheaders( 'content-encoding'))) and status_code == 200: gzip_stream = StringIO(fetch_result.content) gzip_file = gzip.GzipFile(fileobj=gzip_stream) result_content = gzip_file.read() else: result_content = fetch_result.content result_content = re.sub(r';;', ',', result_content) result = simplejson.loads(result_content) except: # can get a response too large error here if status_code == 999: logging.warning('solr_search.query error') else: logging.info('solr_search.query responded %s' % str(status_code)) fetch_end = time.time() result_set.fetch_time = fetch_end - fetch_start if status_code != 200: return result_set #result_content = fetch_result.content parse_start = time.time() # undo comma encoding -- see datahub/footprint_lib.py # result_content = re.sub(r';;', ',', result_content) # result = simplejson.loads(result_content) all_facets = None if need_facet_counts: all_facets = get_geo_counts(args, api_key) if not all_facets or not "facet_counts" in all_facets: result_set.facet_counts = None else: facet_counts = dict() ks = "self_directed:false AND virtual:false AND micro:false" if not args['is_report'] and not args.get(api.PARAM_INVITATIONCODE, None): ks += " AND -statewide:[* TO *] AND -nationwide:[* TO *]" facet_counts["all"] = int( all_facets["facet_counts"]["facet_queries"][ks]) facet_counts.update(get_type_counts(args, api_key)) count = 0 if api.PARAM_TYPE in args: if args[api.PARAM_TYPE] == "statewide": count = facet_counts["statewide"] elif args[api.PARAM_TYPE] == "virtual": count = facet_counts["virtual"] elif args[api.PARAM_TYPE] == "self_directed": count = facet_counts["self_directed"] elif args[api.PARAM_TYPE] == "micro": count = facet_counts["micro"] else: count = facet_counts["all"] facet_counts["count"] = count result_set.facet_counts = facet_counts facets = get_facet_counts(api_key, args) result_set.categories = facets['category_fields'] result_set.providers = facets['provider_fields'] doc_list = result["response"]["docs"] #process json doc list for i, entry in enumerate(doc_list): if not "detailurl" in entry: # URL is required latstr = entry["latitude"] longstr = entry["longitude"] if latstr and longstr and latstr != "" and longstr != "": entry["detailurl"] = "http://maps.google.com/maps?q=" + str( latstr) + "," + str(longstr) else: logging.info('solr_search.query skipping SOLR record' + ' %d: detailurl is missing...' % i) continue url = entry["detailurl"] # ID is the 'stable id' of the item generated by base. # Note that this is not the base url expressed as the Atom id element. item_id = entry["id"] # Base URL is the url of the item in base. For Solr we just use the ID hash base_url = item_id snippet = entry.get('description', '') title = entry.get('title', '') location = entry.get('location_string', '') categories = entry.get('categories', '') if type(categories).__name__ != 'list': try: categories = categories.split(',') except: categories = [] vetted = False if 'Vetted' in categories: vetted = True is_501c3 = False if entry.get('is_501c3', ''): is_501c3 = True org_name = entry.get('org_name', '') if re.search(r'[^a-z]acorn[^a-z]', " " + org_name + " ", re.IGNORECASE): logging.debug('solr_search.query skipping: ACORN in org_name') continue latstr = entry["latitude"] longstr = entry["longitude"] virtual = entry.get('virtual') self_directed = entry.get("self_directed") micro = entry.get("micro") volunteers_needed = entry.get("volunteersneeded") res = searchresult.SearchResult(url, title, snippet, location, item_id, base_url, volunteers_needed, virtual, self_directed, micro, categories, org_name, vetted, is_501c3) # TODO: escape? res.provider = entry["feed_providername"] if (res.provider == "myproj_servegov" and re.search(r'[^a-z]acorn[^a-z]', " " + result_content + " ", re.IGNORECASE)): # per-provider rule because case-insensitivity logging.info( 'solr_search.query skipping: ACORN in for myproj_servegov') continue res.orig_idx = i + 1 res.latlong = "" res.distance = '' res.duration = '' if latstr and longstr: res.latlong = str(latstr) + "," + str(longstr) try: res.distance = str( calc_distance(float(args[api.PARAM_LAT]), float(args[api.PARAM_LNG]), float(latstr), float(longstr))) except: pass # res.event_date_range follows one of these two formats: # <start_date>T<start_time> <end_date>T<end_time> # <date>T<time> res.event_date_range = entry["event_date_range"] res.startdate = datetime.datetime.strptime("2000-01-01", "%Y-%m-%d") res.enddate = datetime.datetime.strptime("2038-01-01", "%Y-%m-%d") if not dumping and res.event_date_range: match = DATE_FORMAT_PATTERN.findall(res.event_date_range) if not match: logging.debug('solr_search.query skipping record' + ' %d: bad date range: %s for %s' % (i, res.event_date_range, url)) continue else: # first match is start date/time startdate = datetime.datetime.strptime(match[0], '%Y-%m-%dT%H:%M:%S') # last match is either end date/time or start/date time enddate = datetime.datetime.strptime(match[-1], '%Y-%m-%dT%H:%M:%S') # protect against absurd dates if startdate > res.startdate: res.startdate = startdate if enddate < res.enddate: res.enddate = enddate if res.startdate and res.enddate: delta = res.enddate - res.startdate res.duration = str(delta.days) for name in utils.unique_list(apiwriter.STANDARD_FIELDS + apiwriter.EXELIS_FIELDS + apiwriter.HOC_FIELDS + apiwriter.CALENDAR_FIELDS): name = name.lower() if len(name) >= 2 and not hasattr(res, name) or not getattr( res, name, None): value = entry.get(name, '') if not isinstance(value, list): setattr(res, name, str(value)) else: setattr(res, name, '\t'.join(value)) # posting.py currently has an authoritative list of fields in "argnames" # that are available to submitted events which may later appear in GBase # so with a few exceptions we want those same fields to become # attributes of our result object except_names = ["title", "description"] for name in posting.argnames: if name not in except_names and name.lower() in entry: # Solr field names are all lowercase. # TODO: fix list in posting.py so it matches solr's fieldnames. setattr(res, name, entry[name.lower()]) result_set.results.append(res) if cache and res.item_id: key = RESULT_CACHE_KEY + res.item_id memcache.set(key, res, time=RESULT_CACHE_TIME) result_set.num_results = len(result_set.results) result_set.total_match = int(result["response"]["numFound"]) result_set.merged_count = result_set.backend_count = result_set.estimated_results = result_set.total_match if group_query: cq = query_url.replace(fields_query, '').replace(group_query, '') result_set.backend_count = get_solr_count(cq, args) cq = query_url.replace(fields_query, '') result_set.merged_count = get_solr_count(cq, args) parse_end = time.time() result_set.parse_time = parse_end - parse_start return result_set
def form_solr_query(args): solr_query = '' api_key = None if api.PARAM_KEY in args: api_key = args[api.PARAM_KEY] logging.info('api_key = %s' % api_key) # args fix up if api.PARAM_START not in args: args[api.PARAM_START] = 1 if api.PARAM_SORT not in args: args[api.PARAM_SORT] = "score" # Generate geo search parameters # TODO: formalize these constants # this is near the middle of the continental US lat = '37' lng = '-95' max_dist = 12400 if args.get(api.PARAM_LAT, None) and args.get(api.PARAM_LNG, None): lat = args[api.PARAM_LAT] lng = args[api.PARAM_LNG] if api.PARAM_VOL_DIST not in args or args[api.PARAM_VOL_DIST] == "": args[api.PARAM_VOL_DIST] = DEFAULT_VOL_DIST max_dist = args[api.PARAM_VOL_DIST] = int(str( args[api.PARAM_VOL_DIST])) if args[api.PARAM_VOL_DIST] < 1: args[api.PARAM_VOL_DIST] = DEFAULT_VOL_DIST max_dist = float(args[api.PARAM_VOL_DIST]) if args.get(api.PARAM_INVITATIONCODE, ''): max_dist = 20030 global GEO_GLOBAL geo_params = ('{!geofilt}&pt=%s,%s&sfield=latlong&d=%s&d1=0' % (str(lat), str(lng), str(max_dist * 1.609))) geo_params += "&bf=recip(geodist(),1,150,10)" GEO_GLOBAL = geo_params if (args['is_report'] or (args.get(api.PARAM_TYPE) and args.get(api.PARAM_TYPE, None) != "all")): geo_params = "" if args['is_report']: GEO_GLOBAL = '' # Running our keyword through our categories dictionary to see if we need to adjust our keyword param if api.PARAM_CATEGORY in args: for key, val in categories.CATEGORIES.iteritems(): if str(args[api.PARAM_CATEGORY]) == val: args[api.PARAM_CATEGORY] = str(key) # keyword original_query = '' query_is_empty = False if (api.PARAM_Q in args and args[api.PARAM_Q] != ""): original_query = args[api.PARAM_Q] qwords = args[api.PARAM_Q].split(" ") for qi, qw in enumerate(qwords): # it is common practice to use a substr of a url eg, volunteermatch # here we transform that to http://*volunteermatch* if qw.find("detailurl:") >= 0 and qw.find("*") < 0: ar = qw.split(":") if len(ar) > 1: ar[1] = "http*" + ar[1] + "*" qw = ":".join(ar) qwords[qi] = qw args[api.PARAM_Q] = ' '.join(qwords) # a category in &q means expand to specific terms as opposed to the # the solr field 'category' which atm may only be 'vetted' args[api.PARAM_Q] = apply_category_query(args[api.PARAM_Q]) if api.PARAM_CATEGORY in args: args[api.PARAM_Q] += (" AND " + args[api.PARAM_CATEGORY]) solr_query += rewrite_query('*:* AND ' + args[api.PARAM_Q], api_key) ga.track("API", args.get(api.PARAM_KEY, 'UI'), args[api.PARAM_Q]) elif api.PARAM_CATEGORY in args: solr_query += rewrite_query('*:* AND ' + args[api.PARAM_CATEGORY], api_key) ga.track("API", args.get(api.PARAM_KEY, 'UI'), args[api.PARAM_CATEGORY]) else: # Query is empty, search for anything at all. query_is_empty = True solr_query += rewrite_query('*:*', api_key) ga.track("API", args.get(api.PARAM_KEY, 'UI'), '*:*') # geo params go in first global KEYWORD_GLOBAL, STATEWIDE_GLOBAL, NATIONWIDE_GLOBAL KEYWORD_GLOBAL = urllib.quote_plus(solr_query) STATEWIDE_GLOBAL, NATIONWIDE_GLOBAL = geocode.get_statewide(lat, lng) solr_query = urllib.quote_plus(solr_query) if api.PARAM_TYPE in args and args[api.PARAM_TYPE] != "all": # Type: these map to the tabs on the search results page # quote plus if args[api.PARAM_TYPE] == "self_directed": solr_query += urllib.quote_plus(" AND self_directed:true") elif args[api.PARAM_TYPE] == "nationwide": nationwide_param = args.get('nationwide', '') if nationwide_param: solr_query += urllib.quote_plus(" AND country:" + nationwide_param) solr_query += urllib.quote_plus( " AND micro:false AND self_directed:false") elif args[api.PARAM_TYPE] == "statewide": statewide_param = args.get('statewide', '') if statewide_param: solr_query += urllib.quote_plus(" AND state:" + statewide_param) else: solr_query += urllib.quote_plus(" AND (statewide:" + STATEWIDE_GLOBAL + " OR nationwide:" + NATIONWIDE_GLOBAL + ")") solr_query += urllib.quote_plus( " AND micro:false AND self_directed:false") elif args[api.PARAM_TYPE] == "virtual": solr_query += urllib.quote_plus( " AND virtual:true AND micro:false AND self_directed:false") elif args[api.PARAM_TYPE] == "micro": solr_query += urllib.quote_plus(" AND micro:true") else: # this keeps the non-geo counts out of the refine by counts fq = '&fq=' fq += urllib.quote( 'self_directed:false AND virtual:false AND micro:false') solr_query += fq global FULL_QUERY_GLOBAL FULL_QUERY_GLOBAL = solr_query # Source global PROVIDER_GLOBAL if api.PARAM_SOURCE in args and args[api.PARAM_SOURCE] != "all": PROVIDER_GLOBAL = urllib.quote_plus(" AND provider_proper_name:(" + args[api.PARAM_SOURCE] + ")") solr_query += PROVIDER_GLOBAL else: PROVIDER_GLOBAL = "" # for ad campaigns if api.PARAM_CAMPAIGN_ID in args: # we need to exclude the opted out opportunities # they can be tagged as opt_out_all_campaigns # or opt_out_campaign_XXX where XXX is the campaign ID. exclusion = '!categories:%s !categories:%s' % ( 'optout_all_campaigns', 'optout_campaign_' + args[api.PARAM_CAMPAIGN_ID]) # TODO: campaign_ids are per-campaign, but opportunities # might prefer to opt out of an entire sponsor. # should probably add a 'sponsor_id' to the spreadsheet, # and have optout_sponsor_XXX as well. solr_query += exclusion # set the solr instance we need to use if not given as an arg global BACKEND_GLOBAL BACKEND_GLOBAL, args = get_solr_backend(args) solr_query += apply_boosts(args, original_query) solr_query += apply_filter_query(api_key, args) group_query = '' if args.get(api.PARAM_MERGE, None) == '2': group_query = ( "&group=true&group.field=aggregatefield&group.main=true") elif args.get(api.PARAM_MERGE, None) == '3': group_query = ("&group=true&group.field=opportunityid&group.main=true") elif args.get(api.PARAM_MERGE, None) == '4': group_query = ( "&group=true&group.field=dateopportunityidgroup&group.main=true&group.limit=7" ) solr_query += group_query # add the geo params solr_query += '&fq=' + geo_params # add the field list fields_query = '&fl=' if api.PARAM_OUTPUT not in args: fields_query += ','.join(api.DEFAULT_OUTPUT_FIELDS) else: if args[api.PARAM_OUTPUT] in api.FIELDS_BY_OUTPUT_TYPE: fields_query += ','.join( utils.unique_list( api.DEFAULT_OUTPUT_FIELDS + api.FIELDS_BY_OUTPUT_TYPE[args[api.PARAM_OUTPUT]])) else: fields_query += '*' # TODO: we were getting "URL too long errors" fields_query = '&fl=*' solr_query += fields_query return solr_query, group_query, fields_query
def __init__(self, initial_vars): super().__init__(initial_vars) self.need_items_table = True self.read_name_specific_defaults_file(super().__thisclass__.__name__) self.full_doit_order = utils.unique_list()
def add_result(self, result, result_set={}): """Add an item dict to the items array.""" #result is instance of SearchResult f1 = 1 first_zip = 1 item = {} for field_info in self.item_fields: name = field_info[0] if (result_set.is_hoc or result_set.is_rss) and name.lower( ) not in utils.unique_list(STANDARD_FIELDS + HOC_FIELDS): continue if result_set.is_exelis and name.lower() not in utils.unique_list( STANDARD_FIELDS + EXELIS_FIELDS): continue if result_set.is_cal and name.lower() not in CALENDAR_FIELDS: continue if (len(name) < 2) and result_set.is_hoc and result_set.is_cal: continue if (not result_set.is_hoc) and (not result_set.is_rss) and ( not result_set.is_cal ) and ( not result_set.is_exelis ) and (API_FIELD_NAMES_MAP.get(name, name) == "appropriateFors" or API_FIELD_NAMES_MAP.get(name, name) == "activityTypes" or API_FIELD_NAMES_MAP.get(name, name) == "categoryTags" or API_FIELD_NAMES_MAP.get(name, name) == "Distance" or API_FIELD_NAMES_MAP.get(name, name) == "sponsoringOrganizationUrl" or API_FIELD_NAMES_MAP.get( name, name) == "affiliateOrganizationName" or API_FIELD_NAMES_MAP.get( name, name) == "affiliateOrganizationUrl" or API_FIELD_NAMES_MAP.get(name, name) == "opportunityId" or API_FIELD_NAMES_MAP.get(name, name) == "opportunityType" or API_FIELD_NAMES_MAP.get(name, name) == "registerType" or API_FIELD_NAMES_MAP.get(name, name) == "occurrenceId" or API_FIELD_NAMES_MAP.get(name, name) == "occurrenceDuration" or API_FIELD_NAMES_MAP.get(name, name) == "eventId" or API_FIELD_NAMES_MAP.get(name, name) == "eventName" or API_FIELD_NAMES_MAP.get(name, name) == "frequencyURL" or API_FIELD_NAMES_MAP.get(name, name) == "frequency" or API_FIELD_NAMES_MAP.get(name, name) == "availabilityDays" or API_FIELD_NAMES_MAP.get(name, name) == "audienceTags" or API_FIELD_NAMES_MAP.get(name, name) == "volunteerHubOrganizationUrl" or API_FIELD_NAMES_MAP.get( name, name) == "volunteerHubOrganizationName" or API_FIELD_NAMES_MAP.get(name, name) == "volunteersNeeded" or API_FIELD_NAMES_MAP.get( name, name) == "affiliateOrganizationID" or API_FIELD_NAMES_MAP.get(name, name) == "rsvpCount" or API_FIELD_NAMES_MAP.get(name, name) == "sexrestrictedto" or API_FIELD_NAMES_MAP.get(name, name) == "eventName" or API_FIELD_NAMES_MAP.get(name, name) == "eventId" or API_FIELD_NAMES_MAP.get(name, name) == "scheduleType"): continue if (result_set.is_hoc or result_set.is_cal or result_set.is_exelis or result_set.is_rss) and (API_FIELD_NAMES_MAP.get( name, name) == "addrname1" or API_FIELD_NAMES_MAP.get( name, name) == "contactNoneNeeded"): continue if not hasattr(result, name): name = name.lower() if not hasattr(result, name) and len(field_info) > 1: name = field_info[1] if not hasattr(result, name): name = name.lower() content = getattr(result, name, '') #print name, '=', content, '<br>' if name.lower() == "enddate": if custom_date_format(content) == 'Present': content = '' elif name == "description": if result_set.args.get('fulldesc', '') != '1': content = content[:300] elif name in ["eventrangestart", "eventrangeend"]: content = content.replace('T', ' ').strip('Z') if isinstance(content, basestring): content = content.strip() # handle lists if isinstance(content, basestring) and content.find('\t') > 0: item[API_FIELD_NAMES_MAP.get(name, name)] = content.split('\t') elif API_FIELD_NAMES_MAP.get( name, name) in ARRAY_FIELDS and not isinstance(content, list): if content: item[API_FIELD_NAMES_MAP.get(name, name)] = [content] else: item[API_FIELD_NAMES_MAP.get(name, name)] = [] else: item[API_FIELD_NAMES_MAP.get(name, name)] = content self.items.append(item)
def query(query_url, group_query, fields_query, args, cache, dumping = False): """run the actual SOLR query (no filtering or sorting).""" logging.debug("Query URL: " + query_url + '&debugQuery=on') result_set = searchresult.SearchResultSet(urllib.unquote(query_url), query_url, []) result_set.query_url = query_url result_set.args = args result_set.fetch_time = 0 result_set.parse_time = 0 fetch_start = time.time() status_code = 999 ui_query_url = query_url api_key = args.get(api.PARAM_KEY, 'UI') if api_key == 'UI': need_facet_counts = True else: need_facet_counts = False if api_key == 'UI': #For UI searches make two queries one gruoupped by opportunityid to retrieve the VOs IDs and the second to retrieve the dates. # The reason is that because of occurrences pagination can not be kept managed solely by rows. facetOppsQuery = re.sub('fl=([*,a-z])','fl=opportunityid,feed_providername,event_date_range,title,description,detailurl,latitude,longitude,categorytags&group=true&group.field=opportunityid&group.main=true&group.format=simple',ui_query_url) try: logging.info("calling SOLR for facetOppsQuery: " + facetOppsQuery) facetOppsQuery += '&r=' + str(random.random()) #fetch_result = urlfetch.fetch(facetOppsQuery, deadline = api.CONST_MAX_FETCH_DEADLINE, headers={"accept-encoding":"gzip"},) fetch_result = urlfetch.fetch(facetOppsQuery, deadline = api.CONST_MAX_FETCH_DEADLINE,) logging.info("calling SOLR for facetOppsQuery headers: %s " % str(fetch_result.header_msg.getheaders('content-encoding'))) status_code = fetch_result.status_code #unzip response if it is compressed if re.search('gzip', str(fetch_result.header_msg.getheaders('content-encoding'))) and status_code == 200 : gzip_stream = StringIO(fetch_result.content) gzip_file = gzip.GzipFile(fileobj=gzip_stream) result_content = gzip_file.read() else: result_content = fetch_result.content result_content = re.sub(r';;', ',', result_content) result = simplejson.loads(result_content) except: # can get a response too large error here if status_code == 999: logging.warning('solr_search.query error 999 %s' % str(status_code)) else: logging.info('solr_search.query responded %s' % str(status_code)) doc_list = result["response"]["docs"] #logging.info('facetOppsQuery result' + str(doc_list)) opportunityList = list() # empty list for i, entry in enumerate(doc_list): opportunityList.append(entry["opportunityid"]) #logging.info('opportunityList i=' + str(i) + ": v=" +str(entry)) opportunityResults = 'opportunityid:(' + '+OR+'.join(opportunityList) + ')' logging.info('opportunityList =' + opportunityResults) ui_query_url = re.sub('rows=([0-9]+)','rows=1000',ui_query_url) ui_query_url = re.sub('start=([0-9]+)','start=0',ui_query_url) ui_query_url = re.sub('fl=([*,a-z])','fl=id,feed_providername,event_date_range,title,description,detailurl,latitude,longitude',ui_query_url) ui_query_url = ui_query_url.replace('&q=','&q='+opportunityResults+'+AND+') try: logging.info("calling SOLR: " + ui_query_url) ui_query_url += '&r=' + str(random.random()) fetch_result = urlfetch.fetch(ui_query_url, deadline = api.CONST_MAX_FETCH_DEADLINE, headers={"accept-encoding":"gzip"},) #fetch_result = urlfetch.fetch(ui_query_url, deadline = api.CONST_MAX_FETCH_DEADLINE,) logging.info("calling SOLR headers: %s " % str(fetch_result.header_msg.getheaders('content-encoding'))) status_code = fetch_result.status_code #unzip response if it is compressed if re.search('gzip', str(fetch_result.header_msg.getheaders('content-encoding'))) and status_code == 200 : gzip_stream = StringIO(fetch_result.content) gzip_file = gzip.GzipFile(fileobj=gzip_stream) result_content = gzip_file.read() else: result_content = fetch_result.content result_content = re.sub(r';;', ',', result_content) result = simplejson.loads(result_content) except: # can get a response too large error here if status_code == 999: logging.warning('solr_search.query error') else: logging.info('solr_search.query responded %s' % str(status_code)) fetch_end = time.time() result_set.fetch_time = fetch_end - fetch_start if status_code != 200: return result_set #result_content = fetch_result.content parse_start = time.time() # undo comma encoding -- see datahub/footprint_lib.py # result_content = re.sub(r';;', ',', result_content) # result = simplejson.loads(result_content) all_facets = None if need_facet_counts: all_facets = get_geo_counts(args, api_key) if not all_facets or not "facet_counts" in all_facets: result_set.facet_counts = None else: facet_counts = dict() ks = "self_directed:false AND virtual:false AND micro:false" if not args['is_report'] and not args.get(api.PARAM_INVITATIONCODE, None): ks += " AND -statewide:[* TO *] AND -nationwide:[* TO *]" facet_counts["all"] = int(all_facets["facet_counts"]["facet_queries"][ks]) facet_counts.update(get_type_counts(args, api_key)) count = 0; if api.PARAM_TYPE in args: if args[api.PARAM_TYPE] == "statewide": count = facet_counts["statewide"] elif args[api.PARAM_TYPE] == "virtual": count = facet_counts["virtual"] elif args[api.PARAM_TYPE] == "self_directed": count = facet_counts["self_directed"] elif args[api.PARAM_TYPE] == "micro": count = facet_counts["micro"] else: count = facet_counts["all"] facet_counts["count"] = count result_set.facet_counts = facet_counts facets = get_facet_counts(api_key, args) result_set.categories = facets['category_fields'] result_set.providers = facets['provider_fields'] doc_list = result["response"]["docs"] #process json doc list for i, entry in enumerate(doc_list): if not "detailurl" in entry: # URL is required latstr = entry["latitude"] longstr = entry["longitude"] if latstr and longstr and latstr != "" and longstr != "": entry["detailurl"] = "http://maps.google.com/maps?q=" + str(latstr) + "," + str(longstr) else: logging.info('solr_search.query skipping SOLR record' + ' %d: detailurl is missing...' % i) continue url = entry["detailurl"] # ID is the 'stable id' of the item generated by base. # Note that this is not the base url expressed as the Atom id element. item_id = entry["id"] # Base URL is the url of the item in base. For Solr we just use the ID hash base_url = item_id snippet = entry.get('description', '') title = entry.get('title', '') location = entry.get('location_string', '') categories = entry.get('categories', '') if type(categories).__name__ != 'list': try: categories = categories.split(',') except: categories = [] vetted = False if 'Vetted' in categories: vetted = True is_501c3 = False if entry.get('is_501c3', ''): is_501c3 = True org_name = entry.get('org_name', '') if re.search(r'[^a-z]acorn[^a-z]', " "+org_name+" ", re.IGNORECASE): logging.debug('solr_search.query skipping: ACORN in org_name') continue latstr = entry["latitude"] longstr = entry["longitude"] virtual = entry.get('virtual') self_directed = entry.get("self_directed") micro = entry.get("micro") volunteers_needed = entry.get("volunteersneeded") res = searchresult.SearchResult(url, title, snippet, location, item_id, base_url, volunteers_needed, virtual, self_directed, micro, categories, org_name, vetted, is_501c3) # TODO: escape? res.provider = entry["feed_providername"] if (res.provider == "myproj_servegov" and re.search(r'[^a-z]acorn[^a-z]', " "+result_content+" ", re.IGNORECASE)): # per-provider rule because case-insensitivity logging.info('solr_search.query skipping: ACORN in for myproj_servegov') continue res.orig_idx = i+1 res.latlong = "" res.distance = '' res.duration = '' if latstr and longstr: res.latlong = str(latstr) + "," + str(longstr) try: res.distance = str(calc_distance(float(args[api.PARAM_LAT]) , float(args[api.PARAM_LNG]) , float(latstr) , float(longstr))) except: pass # res.event_date_range follows one of these two formats: # <start_date>T<start_time> <end_date>T<end_time> # <date>T<time> res.event_date_range = entry["event_date_range"] res.startdate = datetime.datetime.strptime("2000-01-01", "%Y-%m-%d") res.enddate = datetime.datetime.strptime("2038-01-01", "%Y-%m-%d") if not dumping and res.event_date_range: match = DATE_FORMAT_PATTERN.findall(res.event_date_range) if not match: logging.debug('solr_search.query skipping record' + ' %d: bad date range: %s for %s' % (i, res.event_date_range, url)) continue else: # first match is start date/time startdate = datetime.datetime.strptime(match[0], '%Y-%m-%dT%H:%M:%S') # last match is either end date/time or start/date time enddate = datetime.datetime.strptime(match[-1], '%Y-%m-%dT%H:%M:%S') # protect against absurd dates if startdate > res.startdate: res.startdate = startdate if enddate < res.enddate: res.enddate = enddate if res.startdate and res.enddate: delta = res.enddate - res.startdate res.duration = str(delta.days) for name in utils.unique_list(apiwriter.STANDARD_FIELDS + apiwriter.EXELIS_FIELDS + apiwriter.HOC_FIELDS + apiwriter.CALENDAR_FIELDS): name = name.lower() if len(name) >= 2 and not hasattr(res, name) or not getattr(res, name, None): value = entry.get(name, '') if not isinstance(value, list): setattr(res, name, str(value)) else: setattr(res, name, '\t'.join(value)) # posting.py currently has an authoritative list of fields in "argnames" # that are available to submitted events which may later appear in GBase # so with a few exceptions we want those same fields to become # attributes of our result object except_names = ["title", "description"] for name in posting.argnames: if name not in except_names and name.lower() in entry: # Solr field names are all lowercase. # TODO: fix list in posting.py so it matches solr's fieldnames. setattr(res, name, entry[name.lower()]) result_set.results.append(res) if cache and res.item_id: key = RESULT_CACHE_KEY + res.item_id memcache.set(key, res, time=RESULT_CACHE_TIME) result_set.num_results = len(result_set.results) result_set.total_match = int(result["response"]["numFound"]) result_set.merged_count = result_set.backend_count = result_set.estimated_results = result_set.total_match if group_query: cq = query_url.replace(fields_query, '').replace(group_query, '') result_set.backend_count = get_solr_count(cq, args) cq = query_url.replace(fields_query, '') result_set.merged_count = get_solr_count(cq, args) parse_end = time.time() result_set.parse_time = parse_end - parse_start return result_set
def fut(word): image_urls = unique_list( get_flickr_urls(word, "relevance") + get_flickr_urls(word, "interestingness-desc")) random.shuffle(image_urls) return image_urls
def guid_list(items_map): retVal = utils.unique_list() for install_def in list(items_map.values()): retVal.extend(list(filter(bool, install_def.guids))) return retVal
def __get_item_list_for_default_oses_by_category(self, item_category): retVal = utils.unique_list() for os_name in InstallItem._get_for_os: retVal.extend(self.__get_item_list_by_os_and_category(os_name, item_category)) return retVal
def add_result(self, result, result_set = {}): """Add an item dict to the items array.""" #result is instance of SearchResult f1 = 1 first_zip = 1 item = {} for field_info in self.item_fields: name = field_info[0] if (result_set.is_hoc or result_set.is_rss) and name.lower() not in utils.unique_list(STANDARD_FIELDS + HOC_FIELDS): continue if result_set.is_exelis and name.lower() not in utils.unique_list(STANDARD_FIELDS + EXELIS_FIELDS): continue if result_set.is_cal and name.lower() not in CALENDAR_FIELDS: continue if (len(name) < 2) and result_set.is_hoc and result_set.is_cal: continue if (not result_set.is_hoc) and (not result_set.is_rss) and (not result_set.is_cal) and (not result_set.is_exelis) and (API_FIELD_NAMES_MAP.get(name, name) == "appropriateFors" or API_FIELD_NAMES_MAP.get(name, name) == "activityTypes" or API_FIELD_NAMES_MAP.get(name, name) == "categoryTags" or API_FIELD_NAMES_MAP.get(name, name) == "Distance" or API_FIELD_NAMES_MAP.get(name, name) == "sponsoringOrganizationUrl" or API_FIELD_NAMES_MAP.get(name, name) == "affiliateOrganizationName" or API_FIELD_NAMES_MAP.get(name, name) == "affiliateOrganizationUrl" or API_FIELD_NAMES_MAP.get(name, name) == "opportunityId" or API_FIELD_NAMES_MAP.get(name, name) == "opportunityType" or API_FIELD_NAMES_MAP.get(name, name) == "registerType" or API_FIELD_NAMES_MAP.get(name, name) == "occurrenceId" or API_FIELD_NAMES_MAP.get(name, name) == "occurrenceDuration" or API_FIELD_NAMES_MAP.get(name, name) == "eventId" or API_FIELD_NAMES_MAP.get(name, name) == "eventName" or API_FIELD_NAMES_MAP.get(name, name) == "frequencyURL" or API_FIELD_NAMES_MAP.get(name, name) == "frequency" or API_FIELD_NAMES_MAP.get(name, name) == "availabilityDays" or API_FIELD_NAMES_MAP.get(name, name) == "audienceTags" or API_FIELD_NAMES_MAP.get(name, name) == "volunteerHubOrganizationUrl" or API_FIELD_NAMES_MAP.get(name, name) == "volunteerHubOrganizationName" or API_FIELD_NAMES_MAP.get(name, name) == "volunteersNeeded" or API_FIELD_NAMES_MAP.get(name, name) == "affiliateOrganizationID" or API_FIELD_NAMES_MAP.get(name, name) == "rsvpCount" or API_FIELD_NAMES_MAP.get(name, name) == "sexrestrictedto" or API_FIELD_NAMES_MAP.get(name, name) == "eventName" or API_FIELD_NAMES_MAP.get(name, name) == "eventId" or API_FIELD_NAMES_MAP.get(name, name) == "scheduleType"): continue if (result_set.is_hoc or result_set.is_cal or result_set.is_exelis or result_set.is_rss) and (API_FIELD_NAMES_MAP.get(name, name) == "addrname1" or API_FIELD_NAMES_MAP.get(name, name) == "contactNoneNeeded"): continue if not hasattr(result, name): name = name.lower() if not hasattr(result, name) and len(field_info) > 1: name = field_info[1] if not hasattr(result, name): name = name.lower() content = getattr(result, name, '') #print name, '=', content, '<br>' if name.lower() == "enddate": if custom_date_format(content) == 'Present': content = '' elif name == "description": if result_set.args.get('fulldesc', '') != '1': content = content[:300] elif name in ["eventrangestart", "eventrangeend"]: content = content.replace('T', ' ').strip('Z') if isinstance(content, basestring): content = content.strip() # handle lists if isinstance(content, basestring) and content.find('\t') > 0: item[API_FIELD_NAMES_MAP.get(name, name)] = content.split('\t') elif API_FIELD_NAMES_MAP.get(name, name) in ARRAY_FIELDS and not isinstance(content, list): if content: item[API_FIELD_NAMES_MAP.get(name, name)] = [content] else: item[API_FIELD_NAMES_MAP.get(name, name)] = [] else: item[API_FIELD_NAMES_MAP.get(name,name)] = content self.items.append(item)