Exemple #1
0
 def execute_server_listener(self):
     print('Waiting for connections')
     while True:
         connection, address = self.server.accept()
         server_log('Connection %s established to %s' % (connection, address))
         # todo: open thread for new server loop (possibly new object)
         start_new_thread(Server, (connection, address, self.password_hash))
 def validate_relative_path(self, relative_path):
     """
     Validates a path returning an error message and the simplified path
     :param relative_path:
     :return: error (in case there is an error), simplified path in case there is no error
     """
     if not relative_path:
         current_path = ''
         return '', current_path
     else:
         if relative_path.startswith('/'):
             complete_path = self.root_folder_abs_directory + relative_path
         elif relative_path.startswith('~') and self.abs_root_folder:
             complete_path = path.expanduser('~') + relative_path[1:]
         else:
             complete_path = path.join(
                 self.root_folder_abs_directory,
                 path.join(self.current_path, relative_path))
         complete_path = path.normpath(path.realpath(complete_path))
         server_log("Trying to access: %s" % complete_path)
         if not complete_path.startswith(self.root_folder_abs_directory):
             return 'Cannot go outside root folder', ''
         else:
             if not path.exists(complete_path):
                 return 'Directory not found', ''
             elif not path.isdir(complete_path):
                 return ERROR_NOT_A_DIRECTORY, ''
             else:
                 current_path = path.relpath(complete_path,
                                             self.root_folder_abs_directory)
                 if current_path == '.':
                     current_path = ''
                 return '', current_path
Exemple #3
0
def text_to_vector(text):
    """
    Parses input plain text and returns a numpy vector based on the
    pre-loaded `word_model_wv`.

    Args:
      * text: plain text string
    """

    tokens = list(
        map(
            lambda x: x.lemma_,
            filter(
                lambda tok: tok.lemma_ in word_model_wv and tok.lemma_ not in
                stop_words,
                nlp(text),
            ),
        ))

    num_tokens = len(tokens)
    # Generate error message if no valid tokens are found
    if num_tokens == 0:
        message = "Valid tokens not found in user input"
        server_log(f"{message}\n")
        abort(400, message=message)

    # Calculate the mean of all token vectors and convert it to a vector
    # whose shape is accepted by KNN search function.
    vec = np.zeros(word_model_wv.vector_size)
    for tok in tokens:
        vec += word_model_wv[tok]
    vec = (vec / num_tokens)[np.newaxis, :]

    return vec
Exemple #4
0
 def bind(self, host, port):
     try:
         self.server_socket = socket.socket(socket.AF_INET,
                                            socket.SOCK_STREAM)
         utils.server_log("Binding socket to port: " + str(port))
         self.server_socket.bind((host, port))
     except socket.error as e:
         utils.server_log("Socket error: " + str(e))
         sys.exit()
Exemple #5
0
 def cd_command(self, args_list):
     if not args_list:
         self.connection.sendall('%sNo path sent' % INVALID)
     else:
         path = args_list[0]
         error = self.file_manager.resolve_path(path)
         if not error:
             self.pwd_command([])
         else:
             server_log('Access denied')
             self.connection.sendall(('%s%s' % (INVALID, error)).encode())
Exemple #6
0
def test_paper_knn(data_row):
    """Confirm the closest neighbor of an input data row."""

    pmc, np_vec = get_np_vec(data_row)
    closest = get_paper_knn(np_vec)[0]

    server_log(
        f"Closest neighbor: {closest['pmcid']}; distance: {closest['distance']}"
    )

    assert closest['pmcid'] == pmc
    assert closest['distance'] < 1e-2  # ensure that distance is small enough
    server_log(f"{pmc} confirmed\n")
Exemple #7
0
def get_doi_neighbors(user_doi):
    """
    Find the closest papers and journals given an input paper's DOI.
    Arguments:
      * user_doi: biorxiv DOI
    """

    server_log(f"Received user DOI ({user_doi})")

    content, paper_metadata, xml_found = get_doi_content(user_doi)
    file_type = 'XML' if xml_found else 'PDF'

    server_log(f"Downloaded {file_type} content of {user_doi}")

    query_vec = parse_content(content, is_xml=xml_found)

    server_log(f"Start searching {user_doi}")

    paper_knn = get_paper_knn(query_vec)
    journal_knn = get_journal_knn(query_vec)
    coordinates = get_coordinates(query_vec)

    server_log(f"Finished searching {user_doi}\n")

    return {
        "paper_neighbors": paper_knn,
        "journal_neighbors": journal_knn,
        "coordinates": coordinates,
        "paper_info": paper_metadata,
        "xml_found": xml_found
    }
Exemple #8
0
 def server_loop(self):
     print('Waiting for user authentication')
     self.state = USER_AUTH
     while True:
         server_log('Awaiting message')
         received_byte_list = self.get_byte_list()
         server_log('Decoding received message')
         self.data_received = self.decode(received_byte_list)
         if not self.data_received:
             server_log('Connection lost')
             return
         server_log('Executing message')
         self.function_switcher[self.state]()
         server_log('Concluded')
Exemple #9
0
def get_neighbors(user_doi):
    """
    Find the closest papers and journals given an input paper's DOI.
    Arguments:
        - user_doi: biorxiv DOI
    """

    server_log(f"Received user DOI ({user_doi})")

    content, paper_metadata = get_doi_content(user_doi)
    server_log(f"Downloaded PDF content of {user_doi}")
    query_vec = parse_content(content)

    server_log(f"Start searching {user_doi}")

    paper_knn = get_paper_knn(query_vec)
    journal_knn = get_journal_knn(query_vec)
    coordinates = get_coordinates(query_vec)
    server_log(f"Finished searching {user_doi}\n")

    return {
        "paper_neighbors": paper_knn,
        "journal_neighbors": journal_knn,
        "coordinates": coordinates,
        "paper_info": paper_metadata
    }
Exemple #10
0
 def listen(self, max_connections):
     try:
         self.server_socket.listen(max_connections)
         while True:
             client, addr = self.server_socket.accept()
             self.connection_counter += 1
             utils.server_log("Connection has been established [" +
                              addr[0] + ":" + str(addr[1]) + "]")
             client.settimeout(60)
             threading.Thread(target=self.handle_client,
                              name="Connection Thread " +
                              str(self.connection_counter),
                              args=(client, addr)).start()
     except socket.error as e:
         utils.server_log("Socket error: " + str(e))
         sys.exit()
def ping_biorxiv_or_medrxiv(doi, server="biorxiv"):
    """
    This function pings biorxiv or medrxiv to see if doi exists
    within their repository
    Args:
        doi - a doi that grabs the most current version of a preprint
    """
    api_url = f"https://api.biorxiv.org/details/{server}/{doi}"

    try:
        response = requests.get(api_url)
    except Exception as e:
        message = f"Cannot connect to {api_url}"
        server_log(f"{message}: {e}\n")
        abort(404, message=message)

    if response.status_code != 200:
        message = f"Invalid response from {api_url}"
        server_log(f"{message}\n")
        abort(response.status_code, message=message)

    try:
        content = response.json()
    except Exception as e:
        message = f"Cannot convert response from {api_url} to json format"
        server_log(f"{message}: {e}\n")
        abort(404, message=message)

    if len(content["collection"]) < 1:
        return None

    return content
Exemple #12
0
 def write_file(self):
     server_log('writing file: %s' % self.item_name)
     error = self.file_manager.write_file(self.simplified_abs_path, self.item_name, self.data_received)
     if error:
         server_log('File write failed: ' + error)
     else:
         server_log('File write successfully')
     self.state = READING
def get_doi_content(user_doi):
    """
    This function is designed to render the paper-journal
    network for the user given a biorxiv doi

    Args:
        user_doi - a biorxiv doi that grabs the most current version of a preprint
    """

    # Try pinging biorxiv server first
    content = ping_biorxiv_or_medrxiv(user_doi, server="biorxiv")
    pdf_url = f"http://biorxiv.org/content"

    # If no match found try medrxiv
    if content is None:
        content = ping_biorxiv_or_medrxiv(user_doi, server="medrxiv")
        pdf_url = f"http://medrxiv.org/content"

        # If no match at all then raise the red flag
        if content is None:
            message = f"Cannot find document {user_doi} in either biorxiv or medrxiv."
            server_log(f"{message}\n")
            abort(404, message=message)

    latest_paper = content['collection'][-1]

    paper_metadata = {
        "title": latest_paper['title'],
        "authors": latest_paper['authors'],
        "doi": latest_paper['doi'],
        "accepted_date": latest_paper['date'],
        "publisher": "Cold Spring Harbor Laboratory"
    }

    # Grab latest version of PDF file
    pdf_url = f"{pdf_url}/{user_doi}v{latest_paper['version']}.full.pdf"
    try:
        response = requests.get(pdf_url)
    except Exception as e:
        message = f"Cannot connect to {pdf_url}"
        server_log(f"{message}: {e}\n")
        abort(404, message=message)

    if response.status_code != 200:
        message = f"Invalid response from {pdf_url}"
        server_log(f"{message}\n")
        abort(response.status_code, message=message)

    return response.content, paper_metadata
Exemple #14
0
def get_text_neighbors(user_text):
    """
    Find the closest papers and journals given an input plain text.
    Arguments:
      * user_text: user's input plain text
    """

    server_log("Received user's plain text")
    query_vec = text_to_vector(user_text)

    server_log(f"Start plain text searching")

    paper_knn = get_paper_knn(query_vec)
    journal_knn = get_journal_knn(query_vec)
    coordinates = get_coordinates(query_vec)

    server_log(f"Finished plain text searching\n")

    return {
        "paper_neighbors": paper_knn,
        "journal_neighbors": journal_knn,
        "coordinates": coordinates,
    }
Exemple #15
0
def main():
    utils.server_log("Snowflake started", constants.time_datestring)
    rpc = RPCServers(6994, 6993)
    rpc.start()
def get_doi_content(user_doi):
    """
    This function is designed to render the paper-journal
    network for the user given a biorxiv doi
    Args:
        user_doi - a biorxiv doi that grabs the most current version of a preprint
    """

    # Try pinging biorxiv server first
    content = ping_biorxiv_or_medrxiv(user_doi, server="biorxiv")
    doc_url = f"http://biorxiv.org/content"

    # If no match found try medrxiv
    if content is None:
        content = ping_biorxiv_or_medrxiv(user_doi, server="medrxiv")
        doc_url = f"http://medrxiv.org/content"

        # If no match at all then raise the red flag
        if content is None:
            message = f"Cannot find document {user_doi} in either biorxiv or medrxiv."
            server_log(f"{message}\n")
            abort(404, message=message)

    latest_paper = content["collection"][-1]

    paper_metadata = {
        "title": latest_paper["title"],
        "authors": latest_paper["authors"],
        "doi": latest_paper["doi"],
        "accepted_date": latest_paper["date"],
        "publisher": "Cold Spring Harbor Laboratory",
    }

    # Grab latest version of the XML file if available
    accepted_date = latest_paper["date"].replace("-", "/")
    file_url = f"{doc_url}/early/{accepted_date}/{str(Path(user_doi).stem)}.source.xml"
    xml_found = False

    try:
        response = requests.get(file_url)
        if response.status_code == 200:
            xml_found = True

    except Exception as e:
        message = f"Cannot connect to {file_url}"
        server_log(f"{message}: {e}\n")

    # If xml not found then use PDF version
    if not xml_found:

        # Grab latest version of PDF file
        file_url = f"{doc_url}/{user_doi}v{latest_paper['version']}.full.pdf"
        try:
            response = requests.get(file_url)
        except Exception as e:
            message = f"Cannot connect to {file_url}"
            server_log(f"{message}: {e}\n")
            abort(404, message=message)

    if response.status_code != 200:
        message = f"Invalid response from {file_url}"
        server_log(f"{message}\n")
        abort(response.status_code, message=message)

    return response.content, paper_metadata, xml_found
Exemple #17
0
                            for pool in poolscol.find():
                                pools.append(pool)
                            pools_json = dumps(pools)
                            client.send(
                                protocol.response(
                                    len(pools_json.encode("utf-8"))).encode(
                                        "utf-8"))
                            client.send(pools_json.encode("utf-8"))
                client.close()
            except:
                client.close()


if __name__ == "__main__":
    if "CGRU_LOCATION" in os.environ:
        utils.server_log("CGRU_LOCATION=" + os.environ['CGRU_LOCATION'])
    else:
        utils.server_log("CGRU_LOCATION is not set!")
        sys.exit()

    # Loads Mongo DB config
    mongodb_config = utils.get_mongodb_config()

    # Loads pool server config
    Config.check()
    Config.load()

    # Pool server setup
    poolServer = PoolServer()
    poolServer.mongodb_host = mongodb_config["host"]
    poolServer.mongodb_port = str(mongodb_config["port"])