def update_cache_profiles(self, unified_name_to_profiles: {}): """ Given a unified_profile_name to profiles map, merges the profiles and creates the FK references :param unified_name_to_profiles: :param unified_name_to_real_name: :return: """ sql = self._update_cache_profiles[0] # Fire the sql script in a transaction with self._engine.begin() as conn: log.debug("Updating cache profiles") for _, profile_list in unified_name_to_profiles.items(): # flatten the profile list down to one profile reference_profile = None """:type : Profile""" for profile in profile_list: if reference_profile is None or len(profile.display_name) > len(reference_profile.display_name): reference_profile = profile # if we found at least one reference_profile (which we should) # add the corresponding sql insert string to the cache_profile_strings array if reference_profile is not None: u, r = unify_profile_name(reference_profile.first_name, reference_profile.last_name) b64u = generate_id(u) log.info("inserting %s, %s" % (b64u, sanitize_text(r))) conn.execute(sql, (b64u, sanitize_text(r))) log.info("Cache profiles have been updated")
def update_cache_documents(self, unified_document_title_to_documents: {}): """ Given a unified_document_title to documents map, merges the documents and creates the FK references :param unified_document_title_to_documents: :return: """ sql = self._update_cache_documents[0] # Fire the sql script in a transaction with self._engine.begin() as conn: log.debug("Updating cache documents") for _, doc_list in unified_document_title_to_documents.items(): # flatten the document list down to one document reference_doc = None """:type : Document""" for doc in doc_list: if reference_doc is None or doc.core_last_modified > reference_doc.core_last_modified: reference_doc = doc # if we found at least one reference_doc (which we should), # add the corresponding sql insert string to the cache_document_strings array if reference_doc is not None: u, r = unify_document_title(reference_doc.core_title) b64u = generate_id(u) conn.execute(sql, (b64u, sanitize_text(r))) log.info("Cache documents have been updated")
def is_trusted_proxy(addr: str) -> bool: if addr is None: return False log.debug("Checking if address '%s' is a trusted proxy" % addr) for trusted_proxy in trusted_proxies: if trusted_proxy.match(addr): return True return False
def process_profile_documents(self): """ Iterates over the profile documents, finds research fields, finds duplicates, finds author profiles :return: """ for profile_unified in self._unified_name_to_profiles: found_docs = [] profiles = self._unified_name_to_profiles[profile_unified] if len(profiles) == 0: log.warning("There were no profiles for the unified name %s" % profile_unified) continue # For each profile linked to that unified name, add the found documents to the list for profile in profiles: x = self._profile_docs[profile.identifier] log.debug( "Used {len_x} documents from id {mendeley_id} for unified name {name}" .format(len_x=len(x), mendeley_id=profile.identifier, name=unify_profile_name(profile.first_name, profile.last_name))) found_docs += x # Process these documents for doc in found_docs: # Add doc to all docs self._documents.append(doc) # Create unified document title doc_unified, doc_real = unify_document_title(doc.core_title) # Add document to docs if doc_unified in self._unified_document_title_to_documents: existing_docs = self._unified_document_title_to_documents[ doc_unified] existing_docs.append(doc) else: self._unified_document_title_to_documents[doc_unified] = [ doc ] # Append the doc title to the authored_docs of that unified profile name authored_docs = self._unified_name_to_authored_documents[ profile_unified] authored_docs.add(doc_unified) # Process core_authors field of the doc to find participants for author in doc.core_authors: self.analyze_author(doc_unified, author) # Analyze the tags fields of the doc to find research fields for tag in doc.tags: self.analyze_field_tag(doc_unified, tag) log.info("Profile documents have been analyzed")
def crawl_group_members(self): """ Fetches members of the pre-configured research group :return: """ self._members = self._crawler.get_group_members(self._research_group) log.debug( "{num} group members have been fetched for group_id {group_id}". format(num=len(self._members), group_id=self._research_group)) log.info("Group members have been fetched")
def crawl_group_members(self): """ Fetches members of the pre-configured research group :return: """ self._members = self._crawler.get_group_members(self._research_group) log.debug("{num} group members have been fetched for group_id {group_id}".format( num=len(self._members), group_id=self._research_group )) log.info("Group members have been fetched")
def crawl_group_documents(self): """ Fetches the publications that are associated with the pre-configured group :return: """ self._group_documents = self._crawler.get_documents_by_group_id(self._research_group) log.debug("{num} documents have been fetched for group_id {group_id}".format( num=len(self._group_documents), group_id=self._research_group )) log.info("Group documents have been fetched")
def crawl_group_documents(self): """ Fetches the publications that are associated with the pre-configured group :return: """ self._group_documents = self._crawler.get_documents_by_group_id( self._research_group) log.debug( "{num} documents have been fetched for group_id {group_id}".format( num=len(self._group_documents), group_id=self._research_group)) log.info("Group documents have been fetched")
def process_profile_documents(self): """ Iterates over the profile documents, finds research fields, finds duplicates, finds author profiles :return: """ for profile_unified in self._unified_name_to_profiles: found_docs = [] profiles = self._unified_name_to_profiles[profile_unified] if len(profiles) == 0: log.warning("There were no profiles for the unified name %s" % profile_unified) continue # For each profile linked to that unified name, add the found documents to the list for profile in profiles: x = self._profile_docs[profile.identifier] log.debug("Used {len_x} documents from id {mendeley_id} for unified name {name}".format( len_x=len(x), mendeley_id=profile.identifier, name=unify_profile_name(profile.first_name, profile.last_name) )) found_docs += x # Process these documents for doc in found_docs: # Add doc to all docs self._documents.append(doc) # Create unified document title doc_unified, doc_real = unify_document_title(doc.core_title) # Add document to docs if doc_unified in self._unified_document_title_to_documents: existing_docs = self._unified_document_title_to_documents[doc_unified] existing_docs.append(doc) else: self._unified_document_title_to_documents[doc_unified] = [doc] # Append the doc title to the authored_docs of that unified profile name authored_docs = self._unified_name_to_authored_documents[profile_unified] authored_docs.add(doc_unified) # Process core_authors field of the doc to find participants for author in doc.core_authors: self.analyze_author(doc_unified, author) # Analyze the tags fields of the doc to find research fields for tag in doc.tags: self.analyze_field_tag(doc_unified, tag) log.info("Profile documents have been analyzed")
def update_cache_fields(self, unified_field_title_to_field: {}): """ Given a unified_field_title to field map, updates the fields :param unified_field_title_to_field: :return: """ sql = self._update_cache_fields[0] # Fire the sql script in a transaction with self._engine.begin() as conn: log.debug("Updating cache fields") for _, field in unified_field_title_to_field.items(): b64u = generate_id(field.unified_title) conn.execute(sql, (b64u, sanitize_text(field.title))) log.info("Cache fields have been updated")
def get_remote_ip(): """ Given a flask request, extracts the remote address :param request: :return: """ # Enter your trusted proxy here. # With a local NGINX reverse proxy that's localhost # Be aware of that issue: # http://stackoverflow.com/questions/22868900/how-do-i-safely-get-the-users-real-ip-address-in-flask-using-mod-wsgi # Otherwise spoofing becomes dangerous route = request.access_route + [request.remote_addr] log.debug("Route: %s" % route) remote_addr = next( (addr for addr in reversed(route) if not is_trusted_proxy(addr)), request.remote_addr) log.debug("Choosing: '%s'" % remote_addr) return remote_addr
def link_fields_to_documents(self, unified_field_title_to_documents: {}): """ Given a unified_field_title to documents map, creates the N:M relations in the database :param unified_field_title_to_documents: :return: """ # Get the different statements in the sql file delete = self._link_fields_to_documents[0] insert = self._link_fields_to_documents[1] # Fire the sql scripts in a transaction with self._engine.begin() as conn: log.debug("Deleting previous field -> document links") conn.execute(delete) log.debug("Inserting new field -> document links") for unified_field_title, doc_list in unified_field_title_to_documents.items(): for doc_unified in doc_list: conn.execute(insert, (generate_id(doc_unified), generate_id(unified_field_title))) log.info("Field -> document links have been updated")
def crawl_profiles(self): """ Given a populated members array this function crawls the profiles linked to the ids as well as the publications :return: """ log.debug("Adding members to worker queues") for member in self._members: self._profile_queue.put(member.profile_id) self._profile_documents_queue.put(member.profile_id) # Create profile crawlers log.debug("Spawning profile workers") for i in range(number_profile_workers): t = Thread(target=self.profile_worker) t.daemon = False t.start() # Create document crawlers log.debug("Spawning document crawlers") for i in range(number_document_workers): t = Thread(target=self.document_worker) t.daemon = False t.start() # Wait for both queues to complete self._profile_queue.join() self._profile_documents_queue.join() log.info("Profiles and associated documents have been fetched")
def link_profiles_to_documents( self, unified_name_to_profiles: {}, unified_name_to_authored_documents: {}, unified_name_to_participated_documents: {}, ): """ Given a unified_profile_name to authored_documents and participated_documents map(s), creates the N:M relations in the database :param unified_name_to_authored_documents: :param unified_name_to_participated_documents: :return: """ # Get the different statements in the sql file delete = self._link_profiles_to_documents[0] insert = self._link_profiles_to_documents[1] # Fire the sql scripts in a transaction with self._engine.begin() as conn: log.debug("Deleting previous profile -> document links") conn.execute(delete) log.debug("Inserting new profile -> document links") for unified_name, doc_list in unified_name_to_authored_documents.items(): # TODO: if author unknown, ignore for now (Foreign key constraints broken otherwise) if unified_name not in unified_name_to_profiles: continue for doc_unified in doc_list: conn.execute(insert, (generate_id(unified_name), generate_id(doc_unified))) for unified_name, doc_list in unified_name_to_participated_documents.items(): # TODO: if author unknown, ignore for now (Foreign key constraints broken otherwise) if unified_name not in unified_name_to_profiles: continue for doc_unified in doc_list: conn.execute(insert, (generate_id(unified_name), generate_id(doc_unified))) log.info("Profile -> document links have been updated")
def profile_worker(self): """ Given a prefilled profile queue this worker will pop an id and fetch the associated profile :return: """ while not self._profile_queue.empty(): profile_id = self._profile_queue.get() try: # Fetch the profile profile = self._crawler.get_profile_by_id(profile_id) self._profiles.append(profile) log.debug( "The profile for profile_id {profile_id} has been fetched". format(profile_id=profile_id)) # Mark task as done self._profile_queue.task_done() except Exception as e: log.warn( "Failed to fetch the profile for profile_id {profile_id}". format(profile_id=profile_id)) self._profile_queue.task_done()
def profile_worker(self): """ Given a prefilled profile queue this worker will pop an id and fetch the associated profile :return: """ while not self._profile_queue.empty(): profile_id = self._profile_queue.get() try: # Fetch the profile profile = self._crawler.get_profile_by_id(profile_id) self._profiles.append(profile) log.debug("The profile for profile_id {profile_id} has been fetched".format( profile_id=profile_id )) # Mark task as done self._profile_queue.task_done() except Exception as e: log.warn("Failed to fetch the profile for profile_id {profile_id}".format( profile_id=profile_id )) self._profile_queue.task_done()
def get_profiles(self): log.info('The route GET /profiles/ has been triggered') # Default parameters profile_ids = '' field_ids = '' slim = False # Set passed query parameters if existing if 'profile-ids' in request.args: profile_ids = request.args['profile-ids'].split(',') log.debug('Query parameter "profile-ids" = %s' % profile_ids) if 'field-ids' in request.args: field_ids = request.args['field-ids'].split(',') log.debug('Query parameter "field_ids" = %s' % field_ids) if 'slim' in request.args: slim = bool(request.args['slim']) log.debug('Query parameter "slim" = %s' % slim) # Trigger the respective methods profiles = [] if slim: profiles = self._data_controller.api_data.get_profiles_slim() else: profiles = self._data_controller.api_data.get_profiles_by_profile_ids_or_field_ids( profile_ids=profile_ids, field_ids=field_ids) # Pattern for cms pages page_pattern = self._cache_config.profile_page_pattern # Serialize documents response = [] for profile in profiles: profile_dict = dict(profile) # names name = None first_name = None last_name = None # Get names if 'first_name' in profile_dict and 'last_name' in profile_dict: first_name = profile_dict['first_name'] last_name = profile_dict['last_name'] elif 'name' in profile_dict: name = profile_dict['name'] name_parts = [s.lower() for i, s in enumerate(name.split())] first_name = name_parts[0] last_name = name_parts[1] # If the names are available create the page link if first_name is not None and last_name is not None: page = page_pattern page = re.sub(':firstname', first_name, page) page = re.sub(':lastname', last_name, page) profile_dict["page"] = page response.append(profile_dict) return json.dumps(response, cls=DefaultEncoder)
def document_worker(self): """ Given a prefilled profile_documents queue this worker will pop an id and fetch the associated documents :return: """ while not self._profile_documents_queue.empty(): profile_id = self._profile_documents_queue.get() try: # Fetch the document documents = self._crawler.get_documents_by_profile_id( profile_id) self._profile_documents[profile_id] = documents log.debug( "{num} documents have been fetched for profile_id {profile_id}" .format(num=len(documents), profile_id=profile_id)) # Mark task as done self._profile_documents_queue.task_done() except Exception as e: log.warn( "Failed to fetch documents for profile_id {profile_id}". format(profile_id=profile_id)) self._profile_documents_queue.task_done()
def document_worker(self): """ Given a prefilled profile_documents queue this worker will pop an id and fetch the associated documents :return: """ while not self._profile_documents_queue.empty(): profile_id = self._profile_documents_queue.get() try: # Fetch the document documents = self._crawler.get_documents_by_profile_id(profile_id) self._profile_documents[profile_id] = documents log.debug("{num} documents have been fetched for profile_id {profile_id}".format( num=len(documents), profile_id=profile_id )) # Mark task as done self._profile_documents_queue.task_done() except Exception as e: log.warn("Failed to fetch documents for profile_id {profile_id}".format( profile_id=profile_id )) self._profile_documents_queue.task_done()
def get_profiles(self): log.info('The route GET /profiles/ has been triggered') # Default parameters profile_ids = '' field_ids = '' slim = False # Set passed query parameters if existing if 'profile-ids' in request.args: profile_ids = request.args['profile-ids'].split(',') log.debug('Query parameter "profile-ids" = %s' % profile_ids) if 'field-ids' in request.args: field_ids = request.args['field-ids'].split(',') log.debug('Query parameter "field_ids" = %s' % field_ids) if 'slim' in request.args: slim = bool(request.args['slim']) log.debug('Query parameter "slim" = %s' % slim) # Trigger the respective methods profiles = [] if slim: profiles = self._data_controller.api_data.get_profiles_slim() else: profiles = self._data_controller.api_data.get_profiles_by_profile_ids_or_field_ids( profile_ids=profile_ids, field_ids=field_ids ) # Pattern for cms pages page_pattern = self._cache_config.profile_page_pattern # Serialize documents response = [] for profile in profiles: profile_dict = dict(profile) # names name = None first_name = None last_name = None # Get names if 'first_name' in profile_dict and 'last_name' in profile_dict: first_name = profile_dict['first_name'] last_name = profile_dict['last_name'] elif 'name' in profile_dict: name = profile_dict['name'] name_parts = [s.lower() for i, s in enumerate(name.split())] first_name = name_parts[0] last_name = name_parts[1] # If the names are available create the page link if first_name is not None and last_name is not None: page = page_pattern page = re.sub(':firstname', first_name, page) page = re.sub(':lastname', last_name, page) profile_dict["page"] = page response.append(profile_dict) return json.dumps(response, cls=DefaultEncoder)
def __init__(self, profile_page_pattern: str): self._profile_page_pattern = profile_page_pattern log.debug("Using profile_page_pattern: %s" % profile_page_pattern)
def get_documents_by_profile_ids_and_field_ids(self, profile_ids: [int], field_ids: [int], order_attr: str="year", order_dir: str="desc", limit: int=0, offset: int=0, only_count: bool=False): """ Given profile ids and field ids, queries all documents that belong to the research field AND are associated with these profiles :return: """ profile_ids_string = "" field_ids_string = "" query_limit = 20 query_offset = 0 query_order_attr = "pub_year" query_order_dir = "ASC" if len(profile_ids) > 0: profile_ids_string = "(%s)" % (",".join(map(lambda x: "'%s'" % x, profile_ids))) else: profile_ids_string = "(NULL)" if len(field_ids) > 0: field_ids_string = "(%s)" % (",".join(map(lambda x: "'%s'" % x, field_ids))) else: field_ids_string = "(NULL)" # Check order attribute parameter if order_attr == "year": query_order_attr = "d.pub_year" elif order_attr == "title": query_order_attr = "d.title" elif order_attr == "source": query_order_attr = "d.source" # Check order direction if order_dir == "desc": query_order_dir = "DESC" elif order_dir == "asc": query_order_dir = "ASC" # Check limit parameter if limit > 0: query_limit = limit # Check offset parameter if offset > 0: query_offset = offset # If no profile_ids and field_ids have been passed, i need to return everything # && use query without AND xx IN () query = "" if len(profile_ids) > 0 and len(field_ids) > 0: query = self._query_documents_by_profile_ids_and_field_ids[0] query = re.sub(':profile_ids', profile_ids_string, query) query = re.sub(':field_ids', field_ids_string, query) elif len(profile_ids) > 0 and len(field_ids) == 0: query = self._query_documents_by_profile_ids[0] query = re.sub(':profile_ids', profile_ids_string, query) elif len(profile_ids) == 0 and len(field_ids) > 0: query = self._query_documents_by_field_ids[0] query = re.sub(':field_ids', field_ids_string, query) else: query = self._query_all_documents[0] if only_count: select = "SELECT COUNT(DISTINCT cd.id) AS cnt FROM" query = re.sub(query_head, select, query) query = re.sub('ORDER BY :order_by', '', query) query = re.sub('LIMIT :query_limit', '', query) else: select = str( "DISTINCT " "cd.id AS id," "d.mendeley_id AS mendeley_id," "d.title AS title," "d.doc_type AS doc_type," "d.last_modified AS last_modified," "d.abstract AS abstract," "d.source AS source," "d.pub_year AS pub_year," "d.authors AS authors," "d.keywords AS keywords," "d.tags AS tags," "d.derived_bibtex AS derived_bibtex") query = re.sub(':select_attributes', select, query) # Substitute order_by and query_limit as well query = re.sub(':order_by', '{order_attr} {order_dir}'.format( order_attr=query_order_attr, order_dir=query_order_dir ), query) query = re.sub(':query_limit', '{offset},{limit}'.format( offset=query_offset, limit=query_limit ), query) log.info("Querying documents by profile_ids and field_ids\n" "\t| profile_ids: {profile_ids}\n" "\t| field_ids: {field_ids}\n" "\t| order_attr: {order_attr}\n" "\t| order_dir: {order_dir}\n" "\t| offset: {offset}\n" "\t| limit: {limit}\n" "\t| only_count: {only_count}".format( profile_ids=profile_ids_string, field_ids=field_ids_string, order_attr=query_order_attr, order_dir=query_order_dir, offset=query_offset, limit=query_limit, only_count=only_count )) log.debug("Query: {query}".format(query=query)) # Fire the sql script in a transaction with self._engine.begin() as conn: return conn.execute(query).fetchall()
def update_profiles(self, profiles: [Profile]): """ Given a profile list, this method replaces the profiles in the database with new ones :param docs: :return: """ def insert_profile(conn: Connection, insert: str, p: Profile): u, _ = unify_profile_name(p.first_name, p.last_name) b64u = generate_id(u) conn.execute( insert, ( sanitize_text(p.identifier), b64u, sanitize_text(p.first_name), sanitize_text(p.last_name), sanitize_text(p.display_name), sanitize_text(p.link), ), ) # If there's nothing to insert, abort if len(profiles) == 0: return None delete = self._replace_profiles[0] insert = self._replace_profiles[1] temp = self._replace_profiles[2] temp_insert = self._replace_profiles[3] update = self._replace_profiles[4] temp_drop = self._replace_profiles[5] # Fire the sql script in a transaction with self._engine.begin() as conn: log.debug("Deleting existing profiles") conn.execute(delete) log.debug("Inserting new profiles") for profile in profiles: insert_profile(conn, insert, profile) log.debug("Creating temporary table") conn.execute(temp) log.debug("Spooling data into temporary table") conn.execute(temp_insert) log.debug("Creating profile links") conn.execute(update) log.debug("Dropping temporary table") conn.execute(temp_drop) log.info("Profiles have been updated")
def update_documents(self, docs: [Document]): """ Given a document list, this method replaces the documents in the database with new ones :param docs: :return: """ def insert_doc(conn: Connection, insert: str, doc: Document): u, _ = unify_document_title(doc.core_title) b64u = generate_id(u) author_string = map(lambda x: "{first} {last}".format(first=x[0], last=x[1]), doc.core_authors) # Create strings authors_string = ", ".join(author_string) keywords_string = ", ".join(doc.core_keywords) tags_string = ", ".join(doc.tags) # Create bibtex bibtex = generate_bibtex(doc) # Insert tuple conn.execute( insert, ( sanitize_text(doc.core_id), b64u, sanitize_text(doc.core_profile_id), sanitize_text(doc.core_title), sanitize_text(doc.core_type), datetime_to_sqltime(doc.core_created), datetime_to_sqltime(doc.core_last_modified), sanitize_text(doc.core_abstract), sanitize_text(doc.core_source), doc.core_year, sanitize_text(authors_string), sanitize_text(keywords_string), sanitize_text(tags_string), sanitize_text(doc.doc_website), sanitize_text(doc.conf_website), doc.conf_month, sanitize_text(doc.conf_pages), sanitize_text(doc.conf_city), sanitize_text(bibtex), ), ) # If there's nothing to insert, abort if len(docs) == 0: return None delete = self._replace_documents[0] insert = self._replace_documents[1] temp = self._replace_documents[2] temp_insert = self._replace_documents[3] update = self._replace_documents[4] temp_drop = self._replace_documents[5] # Fire the sql script in a transaction with self._engine.begin() as conn: log.debug("Deleting existing documents") conn.execute(delete) log.debug("Inserting new documents") for doc in docs: insert_doc(conn, insert, doc) log.debug("Creating temporary table") conn.execute(temp) log.debug("Spooling data into temporary table") conn.execute(temp_insert) log.debug("Creating document links") conn.execute(update) log.debug("Dropping temporary table") conn.execute(temp_drop) log.info("Documents have been updated")
def get_documents(self): log.info('The route GET /documents/ has been triggered') # Default parameters profile_ids = '' field_ids = '' limit = 0 offset = 0 order_dir = "" order_attr = "" only_count = False # Set passed query parameters if existing if 'profile-ids' in request.args: profile_ids = request.args['profile-ids'].split(',') log.debug('Query parameter "profile-ids" = %s' % profile_ids) if 'field-ids' in request.args: field_ids = request.args['field-ids'].split(',') log.debug('Query parameter "field-ids" = %s' % field_ids) if 'limit' in request.args: limit = int(request.args['limit']) log.debug('Query parameter "limit" = %s' % limit) if 'offset' in request.args: offset = int(request.args['offset']) log.debug('Query parameter "offset" = %s' % offset) if 'order-dir' in request.args: order_dir = request.args['order-dir'] log.debug('Query parameter "order-dir" = %s' % order_dir) if 'order-attr' in request.args: order_attr = request.args['order-attr'] log.debug('Query parameter "order-attr" = %s' % order_attr) if 'only-count' in request.args: only_count = bool(request.args['only-count']) log.debug('Query parameter "only-count" = %s' % only_count) # Trigger the respective methods data = self._data_controller.api_data.get_documents_by_profile_ids_and_field_ids( profile_ids=profile_ids, field_ids=field_ids, order_attr=order_attr, order_dir=order_dir, offset=offset, limit=limit, only_count=only_count) if only_count: # Extract count response = [] for document in data: document_dict = dict(document.items()) response.append(document_dict) if len(response) > 0: return json.dumps(response[0], cls=DefaultEncoder) else: return json.dumps({"cnt": 0}, cls=DefaultEncoder) else: # Serialize documents response = [] for document in data: document_dict = dict(document.items()) response.append(document_dict) return json.dumps(response, cls=DefaultEncoder)
def get_documents(self): log.info('The route GET /documents/ has been triggered') # Default parameters profile_ids = '' field_ids = '' limit = 0 offset = 0 order_dir = "" order_attr = "" only_count = False # Set passed query parameters if existing if 'profile-ids' in request.args: profile_ids = request.args['profile-ids'].split(',') log.debug('Query parameter "profile-ids" = %s' % profile_ids) if 'field-ids' in request.args: field_ids = request.args['field-ids'].split(',') log.debug('Query parameter "field-ids" = %s' % field_ids) if 'limit' in request.args: limit = int(request.args['limit']) log.debug('Query parameter "limit" = %s' % limit) if 'offset' in request.args: offset = int(request.args['offset']) log.debug('Query parameter "offset" = %s' % offset) if 'order-dir' in request.args: order_dir = request.args['order-dir'] log.debug('Query parameter "order-dir" = %s' % order_dir) if 'order-attr' in request.args: order_attr = request.args['order-attr'] log.debug('Query parameter "order-attr" = %s' % order_attr) if 'only-count' in request.args: only_count = bool(request.args['only-count']) log.debug('Query parameter "only-count" = %s' % only_count) # Trigger the respective methods data = self._data_controller.api_data.get_documents_by_profile_ids_and_field_ids( profile_ids=profile_ids, field_ids=field_ids, order_attr=order_attr, order_dir=order_dir, offset=offset, limit=limit, only_count=only_count ) if only_count: # Extract count response = [] for document in data: document_dict = dict(document.items()) response.append(document_dict) if len(response) > 0: return json.dumps(response[0], cls=DefaultEncoder) else: return json.dumps({"cnt": 0}, cls=DefaultEncoder) else: # Serialize documents response = [] for document in data: document_dict = dict(document.items()) response.append(document_dict) return json.dumps(response, cls=DefaultEncoder)