Python unique_listの例、utils.unique_list Pythonの例

コード例 #1

0

ファイルを表示

 def do_apropos(self, params):
     definitions = self.client_prog_inst.create_completion_list("define")
     index = self.client_prog_inst.create_completion_list("index")
     guids = self.client_prog_inst.create_completion_list("guid")
     definitions_results = utils.unique_list()
     index_results = utils.unique_list()
     guids_results = utils.unique_list()
     search_for = params.split()
     work_list = ((definitions, definitions_results), (index, index_results), (guids, guids_results))
     for param in search_for:
         for id_list, results in work_list:
             for identifier in id_list:
                 found_it = re.search(param, identifier, flags=re.IGNORECASE)
                 if found_it:
                     results.append (identifier)
     print ("variables:")
     if definitions_results:
         for var in definitions_results:
             print ("   ", var)
     else:
         print ("    no matching variables were found")
     print ("index items:")
     if index_results:
         for iid in index_results:
             print ("   ", iid)
     else:
         print ("    no matching iids were found")
     print ("guids:")
     if guids_results:
         for guid in guids_results:
             iids_of_guids = self.client_prog_inst.items_table.get_iids_with_specific_detail_values("guid", guid)
             print ("   ", guid, iids_of_guids)
     else:
         print ("    no matching guids were found")

コード例 #2

0

ファイルを表示

 def do_common(self, params):
     iids = shlex.split(params)
     missing_iids = utils.unique_list() # [iid in iids if iid not in ]
     for iid in iids:
         if iid not in self.client_prog_inst.install_definitions_index:
             missing_iids.append(iid)
     if missing_iids:
         print("Could not find in index:", ", ".join(missing_iids))
     else:
         all_needs = list()
         all_needed_by = list()
         for iid in iids:
             needs_list = utils.unique_list()
             self.client_prog_inst.needs(iid, needs_list)
             all_needs.append(needs_list)
             all_needed_by.append(self.client_prog_inst.needed_by(iid))
         needs_result = set(all_needs[0]).intersection(*all_needs)
         needed_by_result = set(all_needed_by[0]).intersection(*all_needed_by)
         if "__ALL_ITEMS_IID__" in needed_by_result:
             needed_by_result.remove("__ALL_ITEMS_IID__")
         if not needs_result:
             needs_result.add("no one")
         print("common needs:\n   ", ", ".join(needs_result))
         if not needed_by_result:
             needed_by_result.add("no one")
         print("common needed by:\n   ", ", ".join(needed_by_result))

コード例 #3

0

ファイルを表示

ファイル: instlInstanceBase_interactive.py プロジェクト: wavesaudio/instl

 def do_apropos(self, params):
     definitions = self.client_prog_inst.create_completion_list("define")
     index = self.client_prog_inst.create_completion_list("index")
     guids = self.client_prog_inst.create_completion_list("guid")
     definitions_results = utils.unique_list()
     index_results = utils.unique_list()
     guids_results = utils.unique_list()
     search_for = params.split()
     work_list = ((definitions, definitions_results), (index, index_results), (guids, guids_results))
     for param in search_for:
         for id_list, results in work_list:
             for identifier in id_list:
                 found_it = re.search(param, identifier, flags=re.IGNORECASE)
                 if found_it:
                     results.append (identifier)
     print ("variables:")
     if definitions_results:
         for var in definitions_results:
             print ("   ", var)
     else:
         print ("    no matching variables were found")
     print ("index items:")
     if index_results:
         for iid in index_results:
             print ("   ", iid)
     else:
         print ("    no matching iids were found")
     print ("guids:")
     if guids_results:
         for guid in guids_results:
             iids_of_guids = self.client_prog_inst.items_table.get_iids_with_specific_detail_values("guid", guid)
             print ("   ", guid, iids_of_guids)
     else:
         print ("    no matching guids were found")

コード例 #4

0

ファイルを表示

 def __init__(self):
     self.__original_install_items = utils.unique_list()
     self.__root_install_items = utils.unique_list()
     self.__update_install_items = utils.unique_list()
     self.__full_install_items = utils.unique_list()
     self.__orphan_install_items = utils.unique_list()
     self.__install_items_by_target_folder = defaultdict(utils.unique_list)
     self.__no_copy_items_by_sync_folder = defaultdict(utils.unique_list)

コード例 #5

0

ファイルを表示

 def __init__(self, iid):
     self.__resolved_inherit = False
     self.__iid = iid
     self.__name = ""
     self.__guids = utils.unique_list()
     self.__remark = ""
     self.__description = ""
     self.__inherit_from = utils.unique_list()
     self.__install_for_os_stack = [InstallItem.os_names[0]]  # reading for all platforms ('common') or for which specific platforms ('Mac', 'Win')?
     self.__items = defaultdict(InstallItem.create_items_section)
     self.__var_list = None
     self.__user_data = None
     self.__last_require_repo_rev = 0

コード例 #6

0

ファイルを表示

 def do_depend(self, params):
     if params:
         for param in shlex.split(params):
             if param not in self.client_prog_inst.install_definitions_index:
                 print(text_with_color(param, 'green'), "not in index")
                 continue
             needs_list = utils.unique_list()
             self.client_prog_inst.needs(param, needs_list)
             if not needs_list:
                 needs_list = ("no one",)
             depend_text_list = list()
             for depend in needs_list:
                 if depend.endswith("(missing)"):
                     depend_text_list.append(text_with_color(depend, 'red'))
                 else:
                     depend_text_list.append(text_with_color(depend, 'yellow'))
             print(text_with_color(param, 'green'), "needs:\n    ", ", ".join(depend_text_list))
             needed_by_list = self.client_prog_inst.needed_by(param)
             if needed_by_list is None:
                 print("could not get needed by list for", text_with_color(param, 'green'))
             else:
                 if not needed_by_list:
                     needed_by_list = ("no one",)
                 needed_by_list = [text_with_color(needed_by, 'yellow') for needed_by in needed_by_list]
                 print(text_with_color(param, 'green'), "needed by:\n    ", ", ".join(needed_by_list))
     return False

コード例 #7

0

ファイルを表示

ファイル: roles.py プロジェクト: scottcs/aws-intention

 def _put(self):
     try:
         self._validate_role(self.body)
     except ValidationException as exc:
         return self._respond(message=str(exc), status=400)
     response = self.db.get(self.db.current_user())
     try:
         roles = response.response['Item']['roles']
     except (KeyError, TypeError):
         return self._respond(message='Not Found', status=404)
     if self.path_parameters['name'] not in [r['name'] for r in roles]:
         return self._respond(message='Not Found', status=404)
     for value in self.body['values']:
         if not self._is_value_defined(value, response):
             return self._respond(message=f'Undefined value "{value}"',
                                  status=400)
     for role in roles:
         if role['name'] == self.path_parameters['name']:
             role.update({
                 'name':
                 self.body['name'],
                 'values':
                 self.body['values'],
                 'aliases':
                 unique_list(self.body.get('aliases', [])),
             })
     response = self.db.update(self.db.current_user(), {'roles': roles})
     return self._respond(message=response.message, status=response.status)

コード例 #8

0

ファイルを表示

 def get_reactions_list(self) -> List[str]:
     if self.is_simple_emoji_or_textual_reaction:
         return [self.text]
     elif self.is_many_reactions:
         return unique_list(find_emojis_in_str(self.text))
     elif self.is_custom_reaction:
         return [cast(str, extract_custom_reaction(self.text))]
     else:
         raise ValueError("Can't extract reaction")

コード例 #9

0

ファイルを表示

 def __init__(self, initial_vars) -> None:
     super().__init__(initial_vars)
     self.total_self_progress: int = 30000
     self.read_defaults_file(super().__thisclass__.__name__)
     self.action_type_to_progress_message = None
     self.__all_iids_by_target_folder = defaultdict(utils.unique_list)
     self.__no_copy_iids_by_sync_folder = defaultdict(utils.unique_list)
     self.auxiliary_iids = utils.unique_list()
     self.main_install_targets = list()

コード例 #10

0

ファイルを表示

ファイル: instlClient.py プロジェクト: wavesaudio/instl

 def __init__(self, initial_vars):
     super().__init__(initial_vars)
     self.total_self_progress = 1000
     self.need_items_table = True
     self.need_info_map_table = True
     self.read_name_specific_defaults_file(super().__thisclass__.__name__)
     self.action_type_to_progress_message = None
     self.__all_iids_by_target_folder = defaultdict(utils.unique_list)
     self.__no_copy_iids_by_sync_folder = defaultdict(utils.unique_list)
     self.auxiliary_iids = utils.unique_list()
     self.main_install_targets = list()

コード例 #11

0

ファイルを表示

def compact_history():
    if hasattr(readline, "replace_history_item"):
        unique_history = utils.unique_list()
        for index in reversed(list(range(1, readline.get_current_history_length()))):
            hist_item = readline.get_history_item(index)
            if hist_item:  # some history items are None (usually at index 0)
                unique_history.append(readline.get_history_item(index))
        unique_history.reverse()
        for index in range(len(unique_history)):
            readline.replace_history_item(index + 1, unique_history[index])
        for index in reversed(list(range(len(unique_history) + 1, readline.get_current_history_length()))):
            readline.remove_history_item(index)

コード例 #12

0

ファイルを表示

ファイル: instlInstanceBase_interactive.py プロジェクト: wavesaudio/instl

def compact_history():
    if hasattr(readline, "replace_history_item"):
        unique_history = utils.unique_list()
        for index in reversed(list(range(1, readline.get_current_history_length()))):
            hist_item = readline.get_history_item(index)
            if hist_item:  # some history items are None (usually at index 0)
                unique_history.append(readline.get_history_item(index))
        unique_history.reverse()
        for index in range(len(unique_history)):
            readline.replace_history_item(index + 1, unique_history[index])
        for index in reversed(list(range(len(unique_history) + 1, readline.get_current_history_length()))):
            readline.remove_history_item(index)

コード例 #13

0

ファイルを表示

 def repr_require_for_yaml(self):
     translate_detail_name = {'require_version': 'version', 'require_guid': 'guid', 'require_by': 'require_by'}
     retVal = defaultdict(dict)
     require_details = self.items_table.get_details_by_name_for_all_iids("require_%")
     for require_detail in require_details:
         item_dict = retVal[require_detail['owner_iid']]
         if require_detail['detail_name'] not in item_dict:
             item_dict[translate_detail_name[require_detail['detail_name']]] = utils.unique_list()
         item_dict[translate_detail_name[require_detail['detail_name']]].append(require_detail['detail_value'])
     for item in retVal.values():
         for sub_item in item.values():
             sub_item.sort()
     return retVal

コード例 #14

0

ファイルを表示

ファイル: instlClient.py プロジェクト: wavesaudio/instl

 def repr_require_for_yaml(self):
     translate_detail_name = {'require_version': 'version', 'require_guid': 'guid', 'require_by': 'require_by'}
     retVal = defaultdict(dict)
     require_details = self.items_table.get_details_by_name_for_all_iids("require_%")
     for require_detail in require_details:
         item_dict = retVal[require_detail['owner_iid']]
         if require_detail['detail_name'] not in item_dict:
             item_dict[translate_detail_name[require_detail['detail_name']]] = utils.unique_list()
         item_dict[translate_detail_name[require_detail['detail_name']]].append(require_detail['detail_value'])
     for item in retVal.values():
         for sub_item in item.values():
             sub_item.sort()
     return retVal

コード例 #15

0

ファイルを表示

    def calculate_full_doit_items_set(self, instlObj):
        """ calculate the set of iids to install by starting with the root set and adding all dependencies.
            Initial list of iids should already be in self.root_doit_items.
            If an install items was not found for a iid, the iid is added to the orphan set.
        """

        root_install_iids_translated = utils.unique_list()
        for root_IID in self.root_doit_items:
            # if IID is a guid iids_from_guid will translate to iid's, or return the IID otherwise
            iids_from_the_root_iid = iids_from_guids(instlObj.install_definitions_index, root_IID)
            for IID in iids_from_the_root_iid:
                if IID in instlObj.install_definitions_index:
                    root_install_iids_translated.append(IID)
                else:
                    self.orphan_doit_items.append(IID)
        self.full_doit_items = root_install_iids_translated

コード例 #16

0

ファイルを表示

ファイル: platformSpecificHelper_Win.py プロジェクト: wavesaudio/instl

    def find_cmd_tool(self, tool_to_find_var_name):
        """ locate the path to a cmd.exe tool on windows, if found put the full path in variable
        :param tool_to_find_var_name: variable name of tool or full path to tool
        :return: the path to the tool
        """
        tool_path = None
        if tool_to_find_var_name in var_stack:
            original_tool_value = var_stack.ResolveVarToStr(tool_to_find_var_name)
            # first try the variable, could be that the tool was already found
            if os.path.isfile(original_tool_value):
                tool_path = original_tool_value

            if tool_path is None:
                # next try to ask the system using the where command
                try:
                    where_tool_path = subprocess.check_output("where " + original_tool_value).strip()
                    where_tool_path = utils.unicodify(where_tool_path)
                    if os.path.isfile(where_tool_path):
                        tool_path = where_tool_path
                        var_stack.set_var(tool_to_find_var_name, "find_cmd_tool").append(tool_path)
                except Exception:
                    pass # never mind, we'll try on our own

            if tool_path is None:
                win_paths = utils.unique_list()
                # try to find the tool in the PATH variable
                if "PATH" in os.environ:
                    # remove newline characters that might lurk in the path (see tech support case 143589)
                    adjusted_path = re.sub('[\r\n]',"?",utils.unicodify(os.environ["PATH"]))
                    win_paths.extend(adjusted_path.split(";"))
                else:
                    print("PATH was not found in environment variables")
                # also add some known location in case user's PATH variable was altered
                if "SystemRoot" in os.environ:
                    system_root = utils.unicodify(os.environ["SystemRoot"])
                    know_locations = (os.path.join(system_root, "System32"),
                                      os.path.join(system_root, "SysWOW64"))
                    win_paths.extend(know_locations)
                for win_path in win_paths:
                    tool_path = os.path.join(win_path, original_tool_value)
                    if os.path.isfile(tool_path):
                        var_stack.set_var(tool_to_find_var_name, "find_cmd_tool ").append(tool_path)
                        break
                else: # break was not called, tool was not found
                    tool_path = None
        return tool_path

コード例 #17

0

ファイルを表示

ファイル: apis.py プロジェクト: nadvornix/ankifier

def parse_senses(results):
    senses = []
    exs = []

    for r in results:
        if r.get("senses"):
            for s in r["senses"]:
                s["part_of_speech"] = r.get("part_of_speech")
                s["definition"] = s.get("definition") or ''
                if s.get("definition") and type(s["definition"]) != list:
                    s["definition"] = [s["definition"]]
                else:
                    senses.append(s)

                examples = s.get("examples") or []
                for example in examples:
                    if example and example.get("text"):
                        exs.append(example["text"])
    return senses, unique_list(exs)

コード例 #18

0

ファイルを表示

 def accumulate_unique_actions(self, action_type, iid_list):
     """ accumulate action_type actions from iid_list, eliminating duplicates"""
     unique_actions = utils.unique_list()  # unique_list will eliminate identical actions while keeping the order
     for IID in iid_list:
         with self.install_definitions_index[IID].push_var_stack_scope() as installi:
             action_var_name = "iid_action_list_" + action_type
             item_actions = var_stack.ResolveVarToList(action_var_name, default=[])
             num_unique_actions = 0
             for an_action in item_actions:
                 len_before = len(unique_actions)
                 unique_actions.append(an_action)
                 len_after = len(unique_actions)
                 if len_before < len_after:  # add progress only for the first same action
                     num_unique_actions += 1
                     action_description = self.action_type_to_progress_message[action_type]
                     if num_unique_actions > 1:
                         action_description = " ".join((action_description, str(num_unique_actions)))
                     unique_actions.append(
                         self.platform_helper.progress("{installi.name} {action_description}".format(**locals())))
     self.batch_accum += unique_actions

コード例 #19

0

ファイルを表示

ファイル: tables.py プロジェクト: jqb/django-tables

    def __init__(self, data, order_by=None, visible_columns=(), column_order=()):
        """Create a new table instance with the iterable ``data``.

        If ``order_by`` is specified, the data will be sorted accordingly.

        Note that unlike a ``Form``, tables are always bound to data. Also
        unlike a form, the ``columns`` attribute is read-only and returns
        ``BoundColum`` wrappers, similar to the ``BoundField``'s you get
        when iterating over a form. This is because the table iterator
        already yields rows, and we need an attribute via which to expose
        the (visible) set of (bound) columns - ``Table.columns`` is simply
        the perfect fit for this. Instead, ``base_colums`` is copied to
        table instances, so modifying that will not touch the class-wide
        column list.
        """
        self._data = data
        self._snapshot = None      # will store output dataset (ordered...)
        self._rows = Rows(self)
        self._columns = Columns(self)

        self.order_by = order_by

        # Make a copy so that modifying this will not touch the class
        # definition. Note that this is different from forms, where the
        # copy is made available in a ``fields`` attribute. See the
        # ``Table`` class docstring for more information.
        self.base_columns = copy.deepcopy(self.base_columns)

        keys_order = list(column_order) + list(self.base_columns.keys())
        self.base_columns.keyOrder = unique_list(keys_order)

        if visible_columns: # set visibility only if visible_columns are specified
            for fname in self.base_columns.keys():
                if not fname in visible_columns:
                    self.base_columns[fname].visible = False

        if self.always_visible_cols:
            for fname in self.base_columns.keys():
                if fname in self.always_visible_cols:
                    self.base_columns[fname].visible = True

コード例 #20

0

ファイルを表示

ファイル: roles.py プロジェクト: scottcs/aws-intention

 def _post(self):
     try:
         self._validate_role(self.body)
     except ValidationException as exc:
         return self._respond(message=str(exc), status=400)
     response = self.db.get(self.db.current_user())
     try:
         roles = response.response['Item']['roles']
     except (KeyError, TypeError):
         roles = []
     if self.body['name'] in [r['name'] for r in roles]:
         return self._respond(message='Resource already exists', status=400)
     for value in self.body['values']:
         if not self._is_value_defined(value, response):
             return self._respond(message=f'Undefined value "{value}"',
                                  status=400)
     roles.append({
         'name': self.body['name'],
         'values': self.body['values'],
         'aliases': unique_list(self.body.get('aliases', [])),
     })
     response = self.db.update(self.db.current_user(), {'roles': roles})
     return self._respond(message=response.message, status=response.status)

コード例 #21

0

ファイルを表示

 def keys(self):
     the_keys = utils.unique_list()
     for a_var_list in reversed(self._ConfigVarList_objs):
         the_keys.extend(list(a_var_list.keys()))
     return list(the_keys)

コード例 #22

0

ファイルを表示

 def __init__(self):
     self.root_doit_items = utils.unique_list()
     self.full_doit_items = utils.unique_list()
     self.orphan_doit_items = utils.unique_list()
     self.doit_items_by_target_folder = defaultdict(utils.unique_list)
     self.no_copy_items_by_sync_folder = defaultdict(utils.unique_list)

コード例 #23

0

ファイルを表示

ファイル: bot.py プロジェクト: Math-O5/approxima-chatbot-python

def friends_command(update, context):
    '''
    friends => Mostra o contato (@ do Tele) de todas as pessoas com que o usuário
    já se conectou.
    '''

    start_t = timer(
    )  # When want to store how long this function takes to complete

    # facilita na hora de referenciar esse usuario
    myself = update.effective_user.id

    my_data = db.get_user_by_id(myself)

    context.user_data['connections'] = my_data['connections']

    if len(context.user_data['connections']) == 0:
        # Este usuario ainda nao tem conexoes
        response = "Você ainda não possui nenhuma conexão!\n"
        response += "Que tal usar o comando /show para conhecer alguém novo?"

        update.message.reply_text(response)

        return ConversationHandler.END

    # Se chegou ate aqui é porque ele tem conexoes

    connections_set = unique_list(context.user_data['connections'])

    # Corrige as suas conexoes caso hajam repetições
    if len(connections_set) < len(context.user_data['connections']):
        # Existem repeticoes no original
        context.user_data['connections'] = list(connections_set)
        db.update_user_by_id(myself,
                             {'connections': context.user_data['connections']})

    bottom_msg = "Utilize esses botões para navegar entre as páginas:\n\n"

    pages_text_list = friends_paginator(connections_set)
    context.user_data['friend_pages'] = pages_text_list

    button_pairs = make_buttons(0, len(pages_text_list) - 1)

    response = pages_text_list[0]

    if len(button_pairs) != 0:
        response += bottom_msg

    # Button pairs consist of (button_text, callback_text)
    keyboard = [[
        InlineKeyboardButton(text, callback_data=callback)
        for text, callback in button_pairs
    ]]

    end_t = timer()

    ellapsed_t = end_t - start_t
    db.register_action('friends_command',
                       myself,
                       additional_data={'ellapsed_time': ellapsed_t})

    update.message.reply_text(response,
                              reply_markup=InlineKeyboardMarkup(keyboard))

    return CHOOSE_PAGE

コード例 #24

0

ファイルを表示

 def original_names_from_wtars_names(self, original_list):
     replaced_list = utils.unique_list()
     replaced_list.extend([self.original_name_from_wtar_name(file_name) for file_name in original_list])
     return replaced_list

コード例 #25

0

ファイルを表示

ファイル: solr_search.py プロジェクト: thegooglecodearchive/allforgood

def form_solr_query(args):
  solr_query = ''

  api_key = None
  if api.PARAM_KEY in args:
    api_key = args[api.PARAM_KEY]
    logging.info('api_key = %s' % api_key)

  # args fix up
  if api.PARAM_START not in args:
    args[api.PARAM_START] = 1

  if api.PARAM_SORT not in args:
    args[api.PARAM_SORT] = "score"

  # Generate geo search parameters
  # TODO: formalize these constants
  # this is near the middle of the continental US 
  lat = '37'
  lng = '-95'
  max_dist = 12400
  if args.get(api.PARAM_LAT, None) and args.get(api.PARAM_LNG, None):
    lat = args[api.PARAM_LAT]
    lng = args[api.PARAM_LNG]
    if api.PARAM_VOL_DIST not in args or args[api.PARAM_VOL_DIST] == "":
      args[api.PARAM_VOL_DIST] = DEFAULT_VOL_DIST
    max_dist = args[api.PARAM_VOL_DIST] = int(str(args[api.PARAM_VOL_DIST]))
    if args[api.PARAM_VOL_DIST] < 1:
      args[api.PARAM_VOL_DIST] = DEFAULT_VOL_DIST
    max_dist = float(args[api.PARAM_VOL_DIST])
  
  if args.get(api.PARAM_INVITATIONCODE, ''):
      max_dist = 20030
      
  global GEO_GLOBAL
  geo_params = ('{!geofilt}&pt=%s,%s&sfield=latlong&d=%s&d1=0' 
                   % (str(lat), str(lng), str(max_dist * 1.609))
               )
  geo_params += "&bf=recip(geodist(),1,150,10)"
  GEO_GLOBAL = geo_params

  if (args['is_report'] 
      or (args.get(api.PARAM_TYPE) and args.get(api.PARAM_TYPE, None) != "all")
  ):
    geo_params = ""       
    if args['is_report']:
      GEO_GLOBAL = ''

  # Running our keyword through our categories dictionary to see if we need to adjust our keyword param   
  if api.PARAM_CATEGORY in args:
    for key, val in categories.CATEGORIES.iteritems():
      if str(args[api.PARAM_CATEGORY]) == val:
        args[api.PARAM_CATEGORY] = str(key)   

  # keyword
  original_query = ''
  query_is_empty = False
  if (api.PARAM_Q in args and args[api.PARAM_Q] != ""):
    original_query = args[api.PARAM_Q]
    qwords = args[api.PARAM_Q].split(" ")
    for qi, qw in enumerate(qwords):
      # it is common practice to use a substr of a url eg, volunteermatch 
      # here we transform that to http://*volunteermatch* 
      if qw.find("detailurl:") >= 0 and qw.find("*") < 0:
        ar = qw.split(":")
        if len(ar) > 1:
          ar[1] = "http*" + ar[1] + "*"
          qw = ":".join(ar)
          qwords[qi] = qw
          args[api.PARAM_Q] = ' '.join(qwords)

    # a category in &q means expand to specific terms as opposed to the
    # the solr field 'category' which atm may only be 'vetted'
    args[api.PARAM_Q] = apply_category_query(args[api.PARAM_Q])
    if api.PARAM_CATEGORY in args:        
      args[api.PARAM_Q] += (" AND " + args[api.PARAM_CATEGORY])

    solr_query += rewrite_query('*:* AND ' + args[api.PARAM_Q], api_key)
    ga.track("API", args.get(api.PARAM_KEY, 'UI'), args[api.PARAM_Q])
  elif api.PARAM_CATEGORY in args:
    solr_query += rewrite_query('*:* AND ' + args[api.PARAM_CATEGORY], api_key)
    ga.track("API", args.get(api.PARAM_KEY, 'UI'), args[api.PARAM_CATEGORY])
  else:
    # Query is empty, search for anything at all.
    query_is_empty = True
    solr_query += rewrite_query('*:*', api_key)
    ga.track("API", args.get(api.PARAM_KEY, 'UI'), '*:*')

  # geo params go in first
  global KEYWORD_GLOBAL, STATEWIDE_GLOBAL, NATIONWIDE_GLOBAL
  KEYWORD_GLOBAL = urllib.quote_plus(solr_query)
  STATEWIDE_GLOBAL, NATIONWIDE_GLOBAL = geocode.get_statewide(lat, lng)

  solr_query = urllib.quote_plus(solr_query)
  
  if api.PARAM_TYPE in args and args[api.PARAM_TYPE] != "all":
    # Type: these map to the tabs on the search results page
    # quote plus
    if args[api.PARAM_TYPE] == "self_directed":
      solr_query += urllib.quote_plus(" AND self_directed:true")
    elif args[api.PARAM_TYPE] == "nationwide":
      nationwide_param = args.get('nationwide', '')
      if nationwide_param:
        solr_query += urllib.quote_plus(" AND country:" + nationwide_param)
      solr_query += urllib.quote_plus(" AND micro:false AND self_directed:false")
      
    elif args[api.PARAM_TYPE] == "statewide":
      statewide_param = args.get('statewide', '')
      if statewide_param:
        solr_query += urllib.quote_plus(" AND state:" + statewide_param)
      else:
        solr_query += urllib.quote_plus(" AND (statewide:" + STATEWIDE_GLOBAL + " OR nationwide:" + NATIONWIDE_GLOBAL + ")")
      solr_query += urllib.quote_plus(" AND micro:false AND self_directed:false")

    elif args[api.PARAM_TYPE] == "virtual":
      solr_query += urllib.quote_plus(" AND virtual:true AND micro:false AND self_directed:false")
    elif args[api.PARAM_TYPE] == "micro":
      solr_query += urllib.quote_plus(" AND micro:true")
  else:
    # this keeps the non-geo counts out of the refine by counts
    fq = '&fq='
    fq += urllib.quote('self_directed:false AND virtual:false AND micro:false')
    solr_query += fq
    
  global FULL_QUERY_GLOBAL
  FULL_QUERY_GLOBAL = solr_query
    
  # Source
  global PROVIDER_GLOBAL
  if api.PARAM_SOURCE in args and args[api.PARAM_SOURCE] != "all":    
    PROVIDER_GLOBAL = urllib.quote_plus(" AND provider_proper_name:(" + args[api.PARAM_SOURCE] + ")")
    solr_query += PROVIDER_GLOBAL
  else:
    PROVIDER_GLOBAL = ""  
      
  # for ad campaigns
  if api.PARAM_CAMPAIGN_ID in args:
    # we need to exclude the opted out opportunities
    # they can be tagged as opt_out_all_campaigns
    # or opt_out_campaign_XXX where XXX is the campaign ID.
    exclusion = '!categories:%s !categories:%s' % (
      'optout_all_campaigns',
      'optout_campaign_' + args[api.PARAM_CAMPAIGN_ID]
    )
    # TODO: campaign_ids are per-campaign, but opportunities
    # might prefer to opt out of an entire sponsor.
    # should probably add a 'sponsor_id' to the spreadsheet,
    # and have optout_sponsor_XXX as well.
    solr_query += exclusion

  # set the solr instance we need to use if not given as an arg

  global BACKEND_GLOBAL
  BACKEND_GLOBAL, args = get_solr_backend(args)
  
  solr_query += apply_boosts(args, original_query);
  solr_query += apply_filter_query(api_key, args)

  group_query = ''
  if args.get(api.PARAM_MERGE, None) == '2':
    group_query = ("&group=true&group.field=aggregatefield&group.main=true")
  elif args.get(api.PARAM_MERGE, None) == '3':
    group_query = ("&group=true&group.field=opportunityid&group.main=true")
  elif args.get(api.PARAM_MERGE, None) == '4':
    group_query = ("&group=true&group.field=dateopportunityidgroup&group.main=true&group.limit=7")

  solr_query += group_query

  # add the geo params
  solr_query += '&fq=' + geo_params

  # add the field list
  fields_query = '&fl='
  if api.PARAM_OUTPUT not in args:
    fields_query += ','.join(api.DEFAULT_OUTPUT_FIELDS)
  else:
    if args[api.PARAM_OUTPUT] in api.FIELDS_BY_OUTPUT_TYPE:
      fields_query += ','.join(utils.unique_list(api.DEFAULT_OUTPUT_FIELDS + 
                                               api.FIELDS_BY_OUTPUT_TYPE[args[api.PARAM_OUTPUT]]))
    else:
      fields_query += '*' 

  # TODO: we were getting "URL too long errors"
  fields_query = '&fl=*' 
  solr_query += fields_query

  return solr_query, group_query, fields_query

コード例 #26

0

ファイルを表示

ファイル: solr_search.py プロジェクト: thegooglecodearchive/allforgood

def query(query_url, group_query, fields_query, args, cache, dumping=False):
    """run the actual SOLR query (no filtering or sorting)."""
    logging.debug("Query URL: " + query_url + '&debugQuery=on')
    result_set = searchresult.SearchResultSet(urllib.unquote(query_url),
                                              query_url, [])

    result_set.query_url = query_url
    result_set.args = args
    result_set.fetch_time = 0
    result_set.parse_time = 0

    fetch_start = time.time()
    status_code = 999
    ui_query_url = query_url

    api_key = args.get(api.PARAM_KEY, 'UI')
    if api_key == 'UI':
        need_facet_counts = True
    else:
        need_facet_counts = False

    if api_key == 'UI':  #For UI searches make two queries one gruoupped by opportunityid to retrieve the VOs IDs and the second to retrieve the dates.
        # The reason is that because of occurrences pagination can not be kept managed solely by rows.
        facetOppsQuery = re.sub(
            'fl=([*,a-z])',
            'fl=opportunityid,feed_providername,event_date_range,title,description,detailurl,latitude,longitude,categorytags&group=true&group.field=opportunityid&group.main=true&group.format=simple',
            ui_query_url)
        try:
            logging.info("calling SOLR for facetOppsQuery: " + facetOppsQuery)
            facetOppsQuery += '&r=' + str(random.random())
            #fetch_result = urlfetch.fetch(facetOppsQuery, deadline = api.CONST_MAX_FETCH_DEADLINE, headers={"accept-encoding":"gzip"},)
            fetch_result = urlfetch.fetch(
                facetOppsQuery,
                deadline=api.CONST_MAX_FETCH_DEADLINE,
            )
            logging.info(
                "calling SOLR for facetOppsQuery headers: %s " %
                str(fetch_result.header_msg.getheaders('content-encoding')))
            status_code = fetch_result.status_code

            #unzip response if it is compressed

            if re.search(
                    'gzip',
                    str(fetch_result.header_msg.getheaders(
                        'content-encoding'))) and status_code == 200:
                gzip_stream = StringIO(fetch_result.content)
                gzip_file = gzip.GzipFile(fileobj=gzip_stream)
                result_content = gzip_file.read()
            else:
                result_content = fetch_result.content

            result_content = re.sub(r';;', ',', result_content)
            result = simplejson.loads(result_content)
        except:
            # can get a response too large error here
            if status_code == 999:
                logging.warning('solr_search.query error 999 %s' %
                                str(status_code))
            else:
                logging.info('solr_search.query responded %s' %
                             str(status_code))

        doc_list = result["response"]["docs"]

        #logging.info('facetOppsQuery result' + str(doc_list))
        opportunityList = list()  # empty list
        for i, entry in enumerate(doc_list):
            opportunityList.append(entry["opportunityid"])
            #logging.info('opportunityList i=' +  str(i) + ": v=" +str(entry))
        opportunityResults = 'opportunityid:(' + '+OR+'.join(
            opportunityList) + ')'
        logging.info('opportunityList =' + opportunityResults)
        ui_query_url = re.sub('rows=([0-9]+)', 'rows=1000', ui_query_url)
        ui_query_url = re.sub('start=([0-9]+)', 'start=0', ui_query_url)
        ui_query_url = re.sub(
            'fl=([*,a-z])',
            'fl=id,feed_providername,event_date_range,title,description,detailurl,latitude,longitude',
            ui_query_url)
        ui_query_url = ui_query_url.replace(
            '&q=', '&q=' + opportunityResults + '+AND+')

    try:
        logging.info("calling SOLR: " + ui_query_url)
        ui_query_url += '&r=' + str(random.random())
        fetch_result = urlfetch.fetch(
            ui_query_url,
            deadline=api.CONST_MAX_FETCH_DEADLINE,
            headers={"accept-encoding": "gzip"},
        )
        #fetch_result = urlfetch.fetch(ui_query_url, deadline = api.CONST_MAX_FETCH_DEADLINE,)
        logging.info(
            "calling SOLR headers: %s " %
            str(fetch_result.header_msg.getheaders('content-encoding')))
        status_code = fetch_result.status_code

        #unzip response if it is compressed

        if re.search(
                'gzip',
                str(fetch_result.header_msg.getheaders(
                    'content-encoding'))) and status_code == 200:
            gzip_stream = StringIO(fetch_result.content)
            gzip_file = gzip.GzipFile(fileobj=gzip_stream)
            result_content = gzip_file.read()
        else:
            result_content = fetch_result.content

        result_content = re.sub(r';;', ',', result_content)
        result = simplejson.loads(result_content)
    except:
        # can get a response too large error here
        if status_code == 999:
            logging.warning('solr_search.query error')
        else:
            logging.info('solr_search.query responded %s' % str(status_code))

    fetch_end = time.time()
    result_set.fetch_time = fetch_end - fetch_start
    if status_code != 200:
        return result_set
    #result_content = fetch_result.content

    parse_start = time.time()
    # undo comma encoding -- see datahub/footprint_lib.py
    # result_content = re.sub(r';;', ',', result_content)
    # result = simplejson.loads(result_content)

    all_facets = None
    if need_facet_counts:
        all_facets = get_geo_counts(args, api_key)

    if not all_facets or not "facet_counts" in all_facets:
        result_set.facet_counts = None
    else:
        facet_counts = dict()
        ks = "self_directed:false AND virtual:false AND micro:false"
        if not args['is_report'] and not args.get(api.PARAM_INVITATIONCODE,
                                                  None):
            ks += " AND -statewide:[* TO *] AND -nationwide:[* TO *]"
        facet_counts["all"] = int(
            all_facets["facet_counts"]["facet_queries"][ks])

        facet_counts.update(get_type_counts(args, api_key))
        count = 0
        if api.PARAM_TYPE in args:
            if args[api.PARAM_TYPE] == "statewide":
                count = facet_counts["statewide"]
            elif args[api.PARAM_TYPE] == "virtual":
                count = facet_counts["virtual"]
            elif args[api.PARAM_TYPE] == "self_directed":
                count = facet_counts["self_directed"]
            elif args[api.PARAM_TYPE] == "micro":
                count = facet_counts["micro"]
            else:
                count = facet_counts["all"]

        facet_counts["count"] = count
        result_set.facet_counts = facet_counts
        facets = get_facet_counts(api_key, args)
        result_set.categories = facets['category_fields']
        result_set.providers = facets['provider_fields']

    doc_list = result["response"]["docs"]

    #process json doc list
    for i, entry in enumerate(doc_list):
        if not "detailurl" in entry:
            # URL is required
            latstr = entry["latitude"]
            longstr = entry["longitude"]
            if latstr and longstr and latstr != "" and longstr != "":
                entry["detailurl"] = "http://maps.google.com/maps?q=" + str(
                    latstr) + "," + str(longstr)
            else:
                logging.info('solr_search.query skipping SOLR record' +
                             ' %d: detailurl is missing...' % i)
                continue

        url = entry["detailurl"]
        # ID is the 'stable id' of the item generated by base.
        # Note that this is not the base url expressed as the Atom id element.
        item_id = entry["id"]
        # Base URL is the url of the item in base. For Solr we just use the ID hash
        base_url = item_id
        snippet = entry.get('description', '')
        title = entry.get('title', '')
        location = entry.get('location_string', '')

        categories = entry.get('categories', '')
        if type(categories).__name__ != 'list':
            try:
                categories = categories.split(',')
            except:
                categories = []

        vetted = False
        if 'Vetted' in categories:
            vetted = True

        is_501c3 = False
        if entry.get('is_501c3', ''):
            is_501c3 = True

        org_name = entry.get('org_name', '')
        if re.search(r'[^a-z]acorn[^a-z]', " " + org_name + " ",
                     re.IGNORECASE):
            logging.debug('solr_search.query skipping: ACORN in org_name')
            continue

        latstr = entry["latitude"]
        longstr = entry["longitude"]
        virtual = entry.get('virtual')
        self_directed = entry.get("self_directed")
        micro = entry.get("micro")
        volunteers_needed = entry.get("volunteersneeded")

        res = searchresult.SearchResult(url, title, snippet, location, item_id,
                                        base_url, volunteers_needed, virtual,
                                        self_directed, micro, categories,
                                        org_name, vetted, is_501c3)

        # TODO: escape?
        res.provider = entry["feed_providername"]
        if (res.provider == "myproj_servegov"
                and re.search(r'[^a-z]acorn[^a-z]', " " + result_content + " ",
                              re.IGNORECASE)):
            # per-provider rule because case-insensitivity
            logging.info(
                'solr_search.query skipping: ACORN in for myproj_servegov')
            continue

        res.orig_idx = i + 1

        res.latlong = ""
        res.distance = ''
        res.duration = ''
        if latstr and longstr:
            res.latlong = str(latstr) + "," + str(longstr)
            try:
                res.distance = str(
                    calc_distance(float(args[api.PARAM_LAT]),
                                  float(args[api.PARAM_LNG]), float(latstr),
                                  float(longstr)))
            except:
                pass

        # res.event_date_range follows one of these two formats:
        #     <start_date>T<start_time> <end_date>T<end_time>
        #     <date>T<time>
        res.event_date_range = entry["event_date_range"]
        res.startdate = datetime.datetime.strptime("2000-01-01", "%Y-%m-%d")
        res.enddate = datetime.datetime.strptime("2038-01-01", "%Y-%m-%d")
        if not dumping and res.event_date_range:
            match = DATE_FORMAT_PATTERN.findall(res.event_date_range)
            if not match:
                logging.debug('solr_search.query skipping record' +
                              ' %d: bad date range: %s for %s' %
                              (i, res.event_date_range, url))
                continue
            else:
                # first match is start date/time
                startdate = datetime.datetime.strptime(match[0],
                                                       '%Y-%m-%dT%H:%M:%S')
                # last match is either end date/time or start/date time
                enddate = datetime.datetime.strptime(match[-1],
                                                     '%Y-%m-%dT%H:%M:%S')
                # protect against absurd dates
                if startdate > res.startdate:
                    res.startdate = startdate
                if enddate < res.enddate:
                    res.enddate = enddate

                if res.startdate and res.enddate:
                    delta = res.enddate - res.startdate
                    res.duration = str(delta.days)

        for name in utils.unique_list(apiwriter.STANDARD_FIELDS +
                                      apiwriter.EXELIS_FIELDS +
                                      apiwriter.HOC_FIELDS +
                                      apiwriter.CALENDAR_FIELDS):
            name = name.lower()
            if len(name) >= 2 and not hasattr(res, name) or not getattr(
                    res, name, None):
                value = entry.get(name, '')
                if not isinstance(value, list):
                    setattr(res, name, str(value))
                else:
                    setattr(res, name, '\t'.join(value))

        # posting.py currently has an authoritative list of fields in "argnames"
        # that are available to submitted events which may later appear in GBase
        # so with a few exceptions we want those same fields to become
        # attributes of our result object
        except_names = ["title", "description"]
        for name in posting.argnames:
            if name not in except_names and name.lower() in entry:
                # Solr field names are all lowercase.
                # TODO: fix list in posting.py so it matches solr's fieldnames.
                setattr(res, name, entry[name.lower()])

        result_set.results.append(res)
        if cache and res.item_id:
            key = RESULT_CACHE_KEY + res.item_id
            memcache.set(key, res, time=RESULT_CACHE_TIME)

    result_set.num_results = len(result_set.results)
    result_set.total_match = int(result["response"]["numFound"])
    result_set.merged_count = result_set.backend_count = result_set.estimated_results = result_set.total_match

    if group_query:
        cq = query_url.replace(fields_query, '').replace(group_query, '')
        result_set.backend_count = get_solr_count(cq, args)
        cq = query_url.replace(fields_query, '')
        result_set.merged_count = get_solr_count(cq, args)

    parse_end = time.time()
    result_set.parse_time = parse_end - parse_start

    return result_set

コード例 #27

0

ファイルを表示

ファイル: solr_search.py プロジェクト: thegooglecodearchive/allforgood

def form_solr_query(args):
    solr_query = ''

    api_key = None
    if api.PARAM_KEY in args:
        api_key = args[api.PARAM_KEY]
        logging.info('api_key = %s' % api_key)

    # args fix up
    if api.PARAM_START not in args:
        args[api.PARAM_START] = 1

    if api.PARAM_SORT not in args:
        args[api.PARAM_SORT] = "score"

    # Generate geo search parameters
    # TODO: formalize these constants
    # this is near the middle of the continental US
    lat = '37'
    lng = '-95'
    max_dist = 12400
    if args.get(api.PARAM_LAT, None) and args.get(api.PARAM_LNG, None):
        lat = args[api.PARAM_LAT]
        lng = args[api.PARAM_LNG]
        if api.PARAM_VOL_DIST not in args or args[api.PARAM_VOL_DIST] == "":
            args[api.PARAM_VOL_DIST] = DEFAULT_VOL_DIST
        max_dist = args[api.PARAM_VOL_DIST] = int(str(
            args[api.PARAM_VOL_DIST]))
        if args[api.PARAM_VOL_DIST] < 1:
            args[api.PARAM_VOL_DIST] = DEFAULT_VOL_DIST
        max_dist = float(args[api.PARAM_VOL_DIST])

    if args.get(api.PARAM_INVITATIONCODE, ''):
        max_dist = 20030

    global GEO_GLOBAL
    geo_params = ('{!geofilt}&pt=%s,%s&sfield=latlong&d=%s&d1=0' %
                  (str(lat), str(lng), str(max_dist * 1.609)))
    geo_params += "&bf=recip(geodist(),1,150,10)"
    GEO_GLOBAL = geo_params

    if (args['is_report'] or (args.get(api.PARAM_TYPE)
                              and args.get(api.PARAM_TYPE, None) != "all")):
        geo_params = ""
        if args['is_report']:
            GEO_GLOBAL = ''

    # Running our keyword through our categories dictionary to see if we need to adjust our keyword param
    if api.PARAM_CATEGORY in args:
        for key, val in categories.CATEGORIES.iteritems():
            if str(args[api.PARAM_CATEGORY]) == val:
                args[api.PARAM_CATEGORY] = str(key)

    # keyword
    original_query = ''
    query_is_empty = False
    if (api.PARAM_Q in args and args[api.PARAM_Q] != ""):
        original_query = args[api.PARAM_Q]
        qwords = args[api.PARAM_Q].split(" ")
        for qi, qw in enumerate(qwords):
            # it is common practice to use a substr of a url eg, volunteermatch
            # here we transform that to http://*volunteermatch*
            if qw.find("detailurl:") >= 0 and qw.find("*") < 0:
                ar = qw.split(":")
                if len(ar) > 1:
                    ar[1] = "http*" + ar[1] + "*"
                    qw = ":".join(ar)
                    qwords[qi] = qw
                    args[api.PARAM_Q] = ' '.join(qwords)

        # a category in &q means expand to specific terms as opposed to the
        # the solr field 'category' which atm may only be 'vetted'
        args[api.PARAM_Q] = apply_category_query(args[api.PARAM_Q])
        if api.PARAM_CATEGORY in args:
            args[api.PARAM_Q] += (" AND " + args[api.PARAM_CATEGORY])

        solr_query += rewrite_query('*:* AND ' + args[api.PARAM_Q], api_key)
        ga.track("API", args.get(api.PARAM_KEY, 'UI'), args[api.PARAM_Q])
    elif api.PARAM_CATEGORY in args:
        solr_query += rewrite_query('*:* AND ' + args[api.PARAM_CATEGORY],
                                    api_key)
        ga.track("API", args.get(api.PARAM_KEY, 'UI'),
                 args[api.PARAM_CATEGORY])
    else:
        # Query is empty, search for anything at all.
        query_is_empty = True
        solr_query += rewrite_query('*:*', api_key)
        ga.track("API", args.get(api.PARAM_KEY, 'UI'), '*:*')

    # geo params go in first
    global KEYWORD_GLOBAL, STATEWIDE_GLOBAL, NATIONWIDE_GLOBAL
    KEYWORD_GLOBAL = urllib.quote_plus(solr_query)
    STATEWIDE_GLOBAL, NATIONWIDE_GLOBAL = geocode.get_statewide(lat, lng)

    solr_query = urllib.quote_plus(solr_query)

    if api.PARAM_TYPE in args and args[api.PARAM_TYPE] != "all":
        # Type: these map to the tabs on the search results page
        # quote plus
        if args[api.PARAM_TYPE] == "self_directed":
            solr_query += urllib.quote_plus(" AND self_directed:true")
        elif args[api.PARAM_TYPE] == "nationwide":
            nationwide_param = args.get('nationwide', '')
            if nationwide_param:
                solr_query += urllib.quote_plus(" AND country:" +
                                                nationwide_param)
            solr_query += urllib.quote_plus(
                " AND micro:false AND self_directed:false")

        elif args[api.PARAM_TYPE] == "statewide":
            statewide_param = args.get('statewide', '')
            if statewide_param:
                solr_query += urllib.quote_plus(" AND state:" +
                                                statewide_param)
            else:
                solr_query += urllib.quote_plus(" AND (statewide:" +
                                                STATEWIDE_GLOBAL +
                                                " OR nationwide:" +
                                                NATIONWIDE_GLOBAL + ")")
            solr_query += urllib.quote_plus(
                " AND micro:false AND self_directed:false")

        elif args[api.PARAM_TYPE] == "virtual":
            solr_query += urllib.quote_plus(
                " AND virtual:true AND micro:false AND self_directed:false")
        elif args[api.PARAM_TYPE] == "micro":
            solr_query += urllib.quote_plus(" AND micro:true")
    else:
        # this keeps the non-geo counts out of the refine by counts
        fq = '&fq='
        fq += urllib.quote(
            'self_directed:false AND virtual:false AND micro:false')
        solr_query += fq

    global FULL_QUERY_GLOBAL
    FULL_QUERY_GLOBAL = solr_query

    # Source
    global PROVIDER_GLOBAL
    if api.PARAM_SOURCE in args and args[api.PARAM_SOURCE] != "all":
        PROVIDER_GLOBAL = urllib.quote_plus(" AND provider_proper_name:(" +
                                            args[api.PARAM_SOURCE] + ")")
        solr_query += PROVIDER_GLOBAL
    else:
        PROVIDER_GLOBAL = ""

    # for ad campaigns
    if api.PARAM_CAMPAIGN_ID in args:
        # we need to exclude the opted out opportunities
        # they can be tagged as opt_out_all_campaigns
        # or opt_out_campaign_XXX where XXX is the campaign ID.
        exclusion = '!categories:%s !categories:%s' % (
            'optout_all_campaigns',
            'optout_campaign_' + args[api.PARAM_CAMPAIGN_ID])
        # TODO: campaign_ids are per-campaign, but opportunities
        # might prefer to opt out of an entire sponsor.
        # should probably add a 'sponsor_id' to the spreadsheet,
        # and have optout_sponsor_XXX as well.
        solr_query += exclusion

    # set the solr instance we need to use if not given as an arg

    global BACKEND_GLOBAL
    BACKEND_GLOBAL, args = get_solr_backend(args)

    solr_query += apply_boosts(args, original_query)
    solr_query += apply_filter_query(api_key, args)

    group_query = ''
    if args.get(api.PARAM_MERGE, None) == '2':
        group_query = (
            "&group=true&group.field=aggregatefield&group.main=true")
    elif args.get(api.PARAM_MERGE, None) == '3':
        group_query = ("&group=true&group.field=opportunityid&group.main=true")
    elif args.get(api.PARAM_MERGE, None) == '4':
        group_query = (
            "&group=true&group.field=dateopportunityidgroup&group.main=true&group.limit=7"
        )

    solr_query += group_query

    # add the geo params
    solr_query += '&fq=' + geo_params

    # add the field list
    fields_query = '&fl='
    if api.PARAM_OUTPUT not in args:
        fields_query += ','.join(api.DEFAULT_OUTPUT_FIELDS)
    else:
        if args[api.PARAM_OUTPUT] in api.FIELDS_BY_OUTPUT_TYPE:
            fields_query += ','.join(
                utils.unique_list(
                    api.DEFAULT_OUTPUT_FIELDS +
                    api.FIELDS_BY_OUTPUT_TYPE[args[api.PARAM_OUTPUT]]))
        else:
            fields_query += '*'

    # TODO: we were getting "URL too long errors"
    fields_query = '&fl=*'
    solr_query += fields_query

    return solr_query, group_query, fields_query

コード例 #28

0

ファイルを表示

ファイル: instlDoIt.py プロジェクト: wavesaudio/instl

 def __init__(self, initial_vars):
     super().__init__(initial_vars)
     self.need_items_table = True
     self.read_name_specific_defaults_file(super().__thisclass__.__name__)
     self.full_doit_order = utils.unique_list()

コード例 #29

0

ファイルを表示

    def add_result(self, result, result_set={}):
        """Add an item dict to the items array."""
        #result is instance of SearchResult

        f1 = 1
        first_zip = 1
        item = {}
        for field_info in self.item_fields:
            name = field_info[0]

            if (result_set.is_hoc or result_set.is_rss) and name.lower(
            ) not in utils.unique_list(STANDARD_FIELDS + HOC_FIELDS):
                continue

            if result_set.is_exelis and name.lower() not in utils.unique_list(
                    STANDARD_FIELDS + EXELIS_FIELDS):
                continue

            if result_set.is_cal and name.lower() not in CALENDAR_FIELDS:
                continue

            if (len(name) < 2) and result_set.is_hoc and result_set.is_cal:
                continue

            if (not result_set.is_hoc) and (not result_set.is_rss) and (
                    not result_set.is_cal
            ) and (
                    not result_set.is_exelis
            ) and (API_FIELD_NAMES_MAP.get(name, name) == "appropriateFors"
                   or API_FIELD_NAMES_MAP.get(name, name) == "activityTypes"
                   or API_FIELD_NAMES_MAP.get(name, name) == "categoryTags"
                   or API_FIELD_NAMES_MAP.get(name, name) == "Distance"
                   or API_FIELD_NAMES_MAP.get(name, name)
                   == "sponsoringOrganizationUrl" or API_FIELD_NAMES_MAP.get(
                       name, name) == "affiliateOrganizationName"
                   or API_FIELD_NAMES_MAP.get(
                       name, name) == "affiliateOrganizationUrl"
                   or API_FIELD_NAMES_MAP.get(name, name) == "opportunityId"
                   or API_FIELD_NAMES_MAP.get(name, name) == "opportunityType"
                   or API_FIELD_NAMES_MAP.get(name, name) == "registerType"
                   or API_FIELD_NAMES_MAP.get(name, name) == "occurrenceId" or
                   API_FIELD_NAMES_MAP.get(name, name) == "occurrenceDuration"
                   or API_FIELD_NAMES_MAP.get(name, name) == "eventId"
                   or API_FIELD_NAMES_MAP.get(name, name) == "eventName"
                   or API_FIELD_NAMES_MAP.get(name, name) == "frequencyURL"
                   or API_FIELD_NAMES_MAP.get(name, name) == "frequency"
                   or API_FIELD_NAMES_MAP.get(name, name) == "availabilityDays"
                   or API_FIELD_NAMES_MAP.get(name, name) == "audienceTags"
                   or API_FIELD_NAMES_MAP.get(name, name)
                   == "volunteerHubOrganizationUrl" or API_FIELD_NAMES_MAP.get(
                       name, name) == "volunteerHubOrganizationName"
                   or API_FIELD_NAMES_MAP.get(name, name) == "volunteersNeeded"
                   or API_FIELD_NAMES_MAP.get(
                       name, name) == "affiliateOrganizationID"
                   or API_FIELD_NAMES_MAP.get(name, name) == "rsvpCount"
                   or API_FIELD_NAMES_MAP.get(name, name) == "sexrestrictedto"
                   or API_FIELD_NAMES_MAP.get(name, name) == "eventName"
                   or API_FIELD_NAMES_MAP.get(name, name) == "eventId"
                   or API_FIELD_NAMES_MAP.get(name, name) == "scheduleType"):
                continue

            if (result_set.is_hoc or result_set.is_cal or result_set.is_exelis
                    or result_set.is_rss) and (API_FIELD_NAMES_MAP.get(
                        name, name) == "addrname1" or API_FIELD_NAMES_MAP.get(
                            name, name) == "contactNoneNeeded"):
                continue

            if not hasattr(result, name):
                name = name.lower()
                if not hasattr(result, name) and len(field_info) > 1:
                    name = field_info[1]
                    if not hasattr(result, name):
                        name = name.lower()

            content = getattr(result, name, '')
            #print name, '=', content, '<br>'

            if name.lower() == "enddate":
                if custom_date_format(content) == 'Present':
                    content = ''
            elif name == "description":
                if result_set.args.get('fulldesc', '') != '1':
                    content = content[:300]
            elif name in ["eventrangestart", "eventrangeend"]:
                content = content.replace('T', ' ').strip('Z')

            if isinstance(content, basestring):
                content = content.strip()

            # handle lists
            if isinstance(content, basestring) and content.find('\t') > 0:
                item[API_FIELD_NAMES_MAP.get(name, name)] = content.split('\t')
            elif API_FIELD_NAMES_MAP.get(
                    name,
                    name) in ARRAY_FIELDS and not isinstance(content, list):
                if content:
                    item[API_FIELD_NAMES_MAP.get(name, name)] = [content]
                else:
                    item[API_FIELD_NAMES_MAP.get(name, name)] = []
            else:
                item[API_FIELD_NAMES_MAP.get(name, name)] = content

        self.items.append(item)

コード例 #30

0

ファイルを表示

ファイル: solr_search.py プロジェクト: thegooglecodearchive/allforgood

def query(query_url, group_query, fields_query, args, cache, dumping = False):
  """run the actual SOLR query (no filtering or sorting)."""
  logging.debug("Query URL: " + query_url + '&debugQuery=on')
  result_set = searchresult.SearchResultSet(urllib.unquote(query_url),
                                            query_url, [])

  result_set.query_url = query_url
  result_set.args = args
  result_set.fetch_time = 0
  result_set.parse_time = 0
  
  fetch_start = time.time()
  status_code = 999
  ui_query_url = query_url
  
  api_key = args.get(api.PARAM_KEY, 'UI')
  if api_key == 'UI':
    need_facet_counts = True
  else:
    need_facet_counts = False

  if api_key == 'UI': #For UI searches make two queries one gruoupped by opportunityid to retrieve the VOs IDs and the second to retrieve the dates.
      # The reason is that because of occurrences pagination can not be kept managed solely by rows.
    facetOppsQuery = re.sub('fl=([*,a-z])','fl=opportunityid,feed_providername,event_date_range,title,description,detailurl,latitude,longitude,categorytags&group=true&group.field=opportunityid&group.main=true&group.format=simple',ui_query_url)
    try:
        logging.info("calling SOLR for facetOppsQuery: " + facetOppsQuery)
        facetOppsQuery += '&r=' + str(random.random())
        #fetch_result = urlfetch.fetch(facetOppsQuery, deadline = api.CONST_MAX_FETCH_DEADLINE, headers={"accept-encoding":"gzip"},)
        fetch_result = urlfetch.fetch(facetOppsQuery, deadline = api.CONST_MAX_FETCH_DEADLINE,)
        logging.info("calling SOLR for facetOppsQuery headers: %s " % str(fetch_result.header_msg.getheaders('content-encoding')))
        status_code = fetch_result.status_code
        
        #unzip response if it is compressed
        
        if re.search('gzip', str(fetch_result.header_msg.getheaders('content-encoding'))) and status_code == 200 :
            gzip_stream = StringIO(fetch_result.content)
            gzip_file = gzip.GzipFile(fileobj=gzip_stream)
            result_content = gzip_file.read()
        else:
            result_content = fetch_result.content
        
        result_content = re.sub(r';;', ',', result_content)
        result = simplejson.loads(result_content)
    except:
        # can get a response too large error here
        if status_code == 999:
          logging.warning('solr_search.query error 999 %s' % str(status_code))
        else:
          logging.info('solr_search.query responded %s' % str(status_code))      

    doc_list = result["response"]["docs"]

    #logging.info('facetOppsQuery result' + str(doc_list))
    opportunityList = list() # empty list
    for i, entry in enumerate(doc_list):
        opportunityList.append(entry["opportunityid"])
        #logging.info('opportunityList i=' +  str(i) + ": v=" +str(entry)) 
    opportunityResults = 'opportunityid:(' + '+OR+'.join(opportunityList) + ')'
    logging.info('opportunityList =' +  opportunityResults)
    ui_query_url = re.sub('rows=([0-9]+)','rows=1000',ui_query_url)
    ui_query_url = re.sub('start=([0-9]+)','start=0',ui_query_url)
    ui_query_url = re.sub('fl=([*,a-z])','fl=id,feed_providername,event_date_range,title,description,detailurl,latitude,longitude',ui_query_url)
    ui_query_url = ui_query_url.replace('&q=','&q='+opportunityResults+'+AND+')
     
  try:
    logging.info("calling SOLR: " + ui_query_url)
    ui_query_url += '&r=' + str(random.random())
    fetch_result = urlfetch.fetch(ui_query_url, deadline = api.CONST_MAX_FETCH_DEADLINE, headers={"accept-encoding":"gzip"},)
    #fetch_result = urlfetch.fetch(ui_query_url, deadline = api.CONST_MAX_FETCH_DEADLINE,)
    logging.info("calling SOLR headers: %s " % str(fetch_result.header_msg.getheaders('content-encoding')))
    status_code = fetch_result.status_code
    
    #unzip response if it is compressed
    
    if re.search('gzip', str(fetch_result.header_msg.getheaders('content-encoding'))) and status_code == 200 :      
        gzip_stream = StringIO(fetch_result.content)
        gzip_file = gzip.GzipFile(fileobj=gzip_stream)
        result_content = gzip_file.read()
    else:
        result_content = fetch_result.content
    
    result_content = re.sub(r';;', ',', result_content)
    result = simplejson.loads(result_content)
  except:
    # can get a response too large error here
    if status_code == 999:
      logging.warning('solr_search.query error')
    else:
      logging.info('solr_search.query responded %s' % str(status_code))

  fetch_end = time.time()
  result_set.fetch_time = fetch_end - fetch_start
  if status_code != 200:
    return result_set
  #result_content = fetch_result.content

  parse_start = time.time()
  # undo comma encoding -- see datahub/footprint_lib.py
  # result_content = re.sub(r';;', ',', result_content)
  # result = simplejson.loads(result_content)
  
  all_facets = None
  if need_facet_counts:
    all_facets = get_geo_counts(args, api_key)

  if not all_facets or not "facet_counts" in all_facets:    
      result_set.facet_counts = None
  else:
    facet_counts = dict()    
    ks = "self_directed:false AND virtual:false AND micro:false"
    if not args['is_report'] and not args.get(api.PARAM_INVITATIONCODE, None):
      ks += " AND -statewide:[* TO *] AND -nationwide:[* TO *]"
    facet_counts["all"] = int(all_facets["facet_counts"]["facet_queries"][ks])

    facet_counts.update(get_type_counts(args, api_key))    
    count = 0;
    if api.PARAM_TYPE in args:
      if args[api.PARAM_TYPE] == "statewide":
        count = facet_counts["statewide"]
      elif args[api.PARAM_TYPE] == "virtual":
        count = facet_counts["virtual"]
      elif args[api.PARAM_TYPE] == "self_directed":
        count = facet_counts["self_directed"]
      elif args[api.PARAM_TYPE] == "micro":
        count = facet_counts["micro"]
      else:
        count = facet_counts["all"]

    facet_counts["count"] = count
    result_set.facet_counts = facet_counts
    facets = get_facet_counts(api_key, args)
    result_set.categories = facets['category_fields']
    result_set.providers = facets['provider_fields']
    
  doc_list = result["response"]["docs"]
  
  #process json doc list
  for i, entry in enumerate(doc_list):
    if not "detailurl" in entry:
      # URL is required 
      latstr = entry["latitude"]
      longstr = entry["longitude"]
      if latstr and longstr and latstr != "" and longstr != "":
        entry["detailurl"] = "http://maps.google.com/maps?q=" + str(latstr) + "," + str(longstr)
      else:
        logging.info('solr_search.query skipping SOLR record' +
                      ' %d: detailurl is missing...' % i)
        continue

    url = entry["detailurl"]
    # ID is the 'stable id' of the item generated by base.
    # Note that this is not the base url expressed as the Atom id element.
    item_id = entry["id"]
    # Base URL is the url of the item in base. For Solr we just use the ID hash
    base_url = item_id
    snippet = entry.get('description', '')
    title = entry.get('title', '')
    location = entry.get('location_string', '')

    categories = entry.get('categories', '')
    if type(categories).__name__ != 'list':
      try:
        categories = categories.split(',')
      except:
        categories = []

    vetted = False
    if 'Vetted' in categories:
      vetted = True

    is_501c3 = False
    if entry.get('is_501c3', ''):
      is_501c3 = True

    org_name = entry.get('org_name', '')
    if re.search(r'[^a-z]acorn[^a-z]', " "+org_name+" ", re.IGNORECASE):
      logging.debug('solr_search.query skipping: ACORN in org_name')
      continue

    latstr = entry["latitude"]
    longstr = entry["longitude"]
    virtual = entry.get('virtual')
    self_directed = entry.get("self_directed")
    micro = entry.get("micro")
    volunteers_needed = entry.get("volunteersneeded")

    res = searchresult.SearchResult(url, title, snippet, location, item_id,
                                    base_url, volunteers_needed, virtual,
                                    self_directed, micro, categories, org_name, 
                                    vetted, is_501c3)

    # TODO: escape?
    res.provider = entry["feed_providername"]
    if (res.provider == "myproj_servegov" and
        re.search(r'[^a-z]acorn[^a-z]', " "+result_content+" ", re.IGNORECASE)):
      # per-provider rule because case-insensitivity
      logging.info('solr_search.query skipping: ACORN in for myproj_servegov')
      continue

    res.orig_idx = i+1

    res.latlong = ""
    res.distance = ''
    res.duration = ''
    if latstr and longstr:
      res.latlong = str(latstr) + "," + str(longstr)
      try:
        res.distance = str(calc_distance(float(args[api.PARAM_LAT])
                                              , float(args[api.PARAM_LNG])
                                              , float(latstr)
                                              , float(longstr)))
      except:
        pass

    # res.event_date_range follows one of these two formats:
    #     <start_date>T<start_time> <end_date>T<end_time>
    #     <date>T<time>
    res.event_date_range = entry["event_date_range"]
    res.startdate = datetime.datetime.strptime("2000-01-01", "%Y-%m-%d")
    res.enddate = datetime.datetime.strptime("2038-01-01", "%Y-%m-%d")
    if not dumping and res.event_date_range:
      match = DATE_FORMAT_PATTERN.findall(res.event_date_range)
      if not match:
        logging.debug('solr_search.query skipping record' +
                        ' %d: bad date range: %s for %s' % 
                        (i, res.event_date_range, url))
        continue
      else:
        # first match is start date/time
        startdate = datetime.datetime.strptime(match[0], '%Y-%m-%dT%H:%M:%S')
        # last match is either end date/time or start/date time
        enddate = datetime.datetime.strptime(match[-1], '%Y-%m-%dT%H:%M:%S')
        # protect against absurd dates
        if startdate > res.startdate:
          res.startdate = startdate
        if enddate < res.enddate:
          res.enddate = enddate

        if res.startdate and res.enddate:
          delta = res.enddate - res.startdate
          res.duration = str(delta.days)

    for name in utils.unique_list(apiwriter.STANDARD_FIELDS + apiwriter.EXELIS_FIELDS + apiwriter.HOC_FIELDS + apiwriter.CALENDAR_FIELDS):
      name = name.lower()
      if len(name) >= 2 and not hasattr(res, name) or not getattr(res, name, None):
        value = entry.get(name, '')
        if not isinstance(value, list):
          setattr(res, name, str(value))
        else:
          setattr(res, name, '\t'.join(value))
    
    # posting.py currently has an authoritative list of fields in "argnames"
    # that are available to submitted events which may later appear in GBase
    # so with a few exceptions we want those same fields to become
    # attributes of our result object
    except_names = ["title", "description"]
    for name in posting.argnames:
      if name not in except_names and name.lower() in entry:
        # Solr field names are all lowercase.
        # TODO: fix list in posting.py so it matches solr's fieldnames.
        setattr(res, name, entry[name.lower()])

    result_set.results.append(res)
    if cache and res.item_id:
      key = RESULT_CACHE_KEY + res.item_id
      memcache.set(key, res, time=RESULT_CACHE_TIME)

  result_set.num_results = len(result_set.results)
  result_set.total_match = int(result["response"]["numFound"])
  result_set.merged_count = result_set.backend_count = result_set.estimated_results = result_set.total_match

  if group_query:
    cq = query_url.replace(fields_query, '').replace(group_query, '')
    result_set.backend_count = get_solr_count(cq, args)
    cq = query_url.replace(fields_query, '')
    result_set.merged_count = get_solr_count(cq, args)

  parse_end = time.time()
  result_set.parse_time = parse_end - parse_start

  return result_set

コード例 #31

0

ファイルを表示

ファイル: apis.py プロジェクト: nadvornix/ankifier

 def fut(word):
     image_urls = unique_list(
         get_flickr_urls(word, "relevance") +
         get_flickr_urls(word, "interestingness-desc"))
     random.shuffle(image_urls)
     return image_urls

コード例 #32

0

ファイルを表示

def guid_list(items_map):
    retVal = utils.unique_list()
    for install_def in list(items_map.values()):
        retVal.extend(list(filter(bool, install_def.guids)))
    return retVal

コード例 #33

0

ファイルを表示

 def __get_item_list_for_default_oses_by_category(self, item_category):
     retVal = utils.unique_list()
     for os_name in InstallItem._get_for_os:
         retVal.extend(self.__get_item_list_by_os_and_category(os_name, item_category))
     return retVal

コード例 #34

0

ファイルを表示

ファイル: apiwriter.py プロジェクト: Alwnikrotikz/allforgood

  def add_result(self, result, result_set = {}):
    """Add an item dict to the items array."""
    #result is instance of SearchResult
    
    f1 = 1
    first_zip = 1 
    item = {}
    for field_info in self.item_fields:
      name = field_info[0]
      
      if (result_set.is_hoc or result_set.is_rss) and name.lower() not in utils.unique_list(STANDARD_FIELDS + HOC_FIELDS):
        continue

      if result_set.is_exelis and name.lower() not in utils.unique_list(STANDARD_FIELDS + EXELIS_FIELDS):
        continue
    
      if result_set.is_cal and name.lower() not in CALENDAR_FIELDS:
        continue

      if (len(name) < 2) and result_set.is_hoc and result_set.is_cal:
        continue
      
   
      
      if (not result_set.is_hoc) and (not result_set.is_rss) and (not result_set.is_cal) and (not result_set.is_exelis) and (API_FIELD_NAMES_MAP.get(name, name) == "appropriateFors" or API_FIELD_NAMES_MAP.get(name, name) == "activityTypes" or API_FIELD_NAMES_MAP.get(name, name) == "categoryTags" or API_FIELD_NAMES_MAP.get(name, name) == "Distance" or API_FIELD_NAMES_MAP.get(name, name) == "sponsoringOrganizationUrl" or API_FIELD_NAMES_MAP.get(name, name) == "affiliateOrganizationName" or API_FIELD_NAMES_MAP.get(name, name) == "affiliateOrganizationUrl" or API_FIELD_NAMES_MAP.get(name, name) == "opportunityId" or API_FIELD_NAMES_MAP.get(name, name) == "opportunityType" or API_FIELD_NAMES_MAP.get(name, name) == "registerType" or API_FIELD_NAMES_MAP.get(name, name) == "occurrenceId" or API_FIELD_NAMES_MAP.get(name, name) == "occurrenceDuration" or API_FIELD_NAMES_MAP.get(name, name) == "eventId" or API_FIELD_NAMES_MAP.get(name, name) == "eventName" or API_FIELD_NAMES_MAP.get(name, name) == "frequencyURL" or API_FIELD_NAMES_MAP.get(name, name) == "frequency" or API_FIELD_NAMES_MAP.get(name, name) == "availabilityDays" or API_FIELD_NAMES_MAP.get(name, name) == "audienceTags" or API_FIELD_NAMES_MAP.get(name, name) == "volunteerHubOrganizationUrl" or API_FIELD_NAMES_MAP.get(name, name) == "volunteerHubOrganizationName" or API_FIELD_NAMES_MAP.get(name, name) == "volunteersNeeded" or API_FIELD_NAMES_MAP.get(name, name) == "affiliateOrganizationID" or API_FIELD_NAMES_MAP.get(name, name) == "rsvpCount" or API_FIELD_NAMES_MAP.get(name, name) == "sexrestrictedto" or API_FIELD_NAMES_MAP.get(name, name) == "eventName" or API_FIELD_NAMES_MAP.get(name, name) == "eventId" or API_FIELD_NAMES_MAP.get(name, name) == "scheduleType"):
          continue
      
      if (result_set.is_hoc or result_set.is_cal or result_set.is_exelis or result_set.is_rss) and (API_FIELD_NAMES_MAP.get(name, name) == "addrname1" or API_FIELD_NAMES_MAP.get(name, name) == "contactNoneNeeded"):
          continue
      
      if not hasattr(result, name):
        name = name.lower()
        if not hasattr(result, name) and len(field_info) > 1:
          name = field_info[1]
          if not hasattr(result, name):
            name = name.lower()

      content = getattr(result, name, '')
      #print name, '=', content, '<br>'
        
      if name.lower() == "enddate":
        if custom_date_format(content) == 'Present':
          content = ''
      elif name == "description":
        if result_set.args.get('fulldesc', '') != '1':
          content = content[:300]
      elif name in ["eventrangestart", "eventrangeend"]:
        content = content.replace('T', ' ').strip('Z')
   
      if isinstance(content, basestring):
         content = content.strip()
    
      
      # handle lists
      if isinstance(content, basestring) and content.find('\t') > 0: 
        item[API_FIELD_NAMES_MAP.get(name, name)] = content.split('\t')
      elif API_FIELD_NAMES_MAP.get(name, name) in ARRAY_FIELDS and not isinstance(content, list):
        if content:
          item[API_FIELD_NAMES_MAP.get(name, name)] = [content]
        else:   
          item[API_FIELD_NAMES_MAP.get(name, name)] = []
      else: 
          item[API_FIELD_NAMES_MAP.get(name,name)] = content

    self.items.append(item)