def search(url: str, query: dict, data: pd.DataFrame or str or d3m_ds.Dataset=None, send_data=True, max_return_docs: int=20, return_named_entity: bool=False) -> typing.List[Dataset]: """ Follow the API defined by https://datadrivendiscovery.org/wiki/display/work/Python+API Args: url: str - the datamart server(for ISI's datamart it is meaningless, just a flag) query: JSON object describing the query(https://datadrivendiscovery.org/wiki/display/work/Query+results+schema) data: the data you are trying to augment. It can be provided as one of: - a pandas.DataFrame object - a D3M Dataset object - the path to a D3M datasetDoc.json file - the path to a CSV file send_data: (for ISI's datamart it is meaningless) Returns: a list of datamart.Dataset objects """ if not url.startswith(SEARCH_URL): return [] loaded_data = DataLoader.load_data(data) augmenter = Augment(es_index=PRODUCTION_ES_INDEX) es_results = [] if (query and ('required_variables' in query)) or (loaded_data is None): # if ("required_variables" exists or no data): es_results = augmenter.query_by_json(query, loaded_data, size=max_return_docs, return_named_entity=return_named_entity) or [] else: # if there is no "required_variables" in the query JSON, but the dataset exists, # try each named entity column as "required_variables" and concat the results: query = query or {} exist = set() for col in loaded_data: if Utils.is_column_able_to_query(loaded_data[col]): query['required_variables'] = [{ "type": "dataframe_columns", "names": [col] }] cur_results = augmenter.query_by_json(query, loaded_data, size=max_return_docs, return_named_entity=return_named_entity) if not cur_results: continue for res in cur_results: if res['_id'] not in exist: # TODO: how about the score ?? exist.add(res['_id']) es_results.append(res) return [Dataset(es_result, original_data=loaded_data, query_json=query) for es_result in es_results]
def search( query: dict, data: pd.DataFrame or str or d3m_ds.Dataset = None) -> typing.List[Dataset]: """ Follow the API defined by https://datadrivendiscovery.org/wiki/display/work/Python+API Args: query: JSON object describing the query(https://datadrivendiscovery.org/wiki/display/work/Query+results+schema) data: the data you are trying to augment. It can be provided as one of: - a pandas.DataFrame object - a D3M Dataset object - the path to a D3M datasetDoc.json file - the path to a CSV file Returns: a list of datamart.Dataset objects. """ loaded_data = DataLoader.load_data(data) augmenter = Augment(es_index=DEFAULT_ES) if not (query and ('required_variables' in query)) and (loaded_data is not None): query = query or {} query['required_variables'] = [] for col in loaded_data: if Utils.is_column_able_to_query(loaded_data[col]): query['required_variables'].append({ "type": "dataframe_columns", "names": [col] }) es_results = augmenter.query_by_json(query, loaded_data) if es_results: return [ Dataset(es_result, original_data=loaded_data, query_json=query) for es_result in es_results ] return []