def test_top_author_format(sample_data, zero_division_data) -> None: """ Test to ensure that the top_author is returning a tuple of two lists with the appropriate formatting for the output :param sample_data: the sample data :return: None """ # get the top authors for a set of documents datasets = [sample_data['documents'], zero_division_data["documents"]] for d in datasets: # create an AuthorRank object ar_graph = ar.Graph() # fit to the data ar_graph.fit(documents=d) # get the top authors for a set of documents top = ar_graph.top_authors() # check that it returns a tuple assert type(top) == tuple # check to ensure each value in the responses are in the appropriate format for k, v in zip(top[0], top[1]): assert type(k) == tuple assert type(v) == float
def test_speed(mls_data) -> None: """ While AuthorRank is not intended to be quick, we can create a benchmark for its performance when generating the graph and ensure that any future changes to the code base do not exceed this threshold. This function could be used in the future to test speed improvements to the approach by further constraining the maximum allowed time for the test to pass or by testing speed differences between normal and parallel processing modes. :return: None """ # get the start time t0 = time.time() # calculate the top author graph ar_graph = ar.Graph() ar_graph.fit(documents=mls_data, progress_bar=True, authorship_key="author", keys=set(["given", "family"])) # get the finish time t1 = time.time() # assert the time is less than a particular amount spread = t1 - t0 assert spread < 320.
def test_single_author() -> None: """ Tests the functionality of AuthorRank in the rare case when a single author is present in the document set passed. :return: None """ # first, create a single author dataset data = [{ "title": "PyNomaly: Anomaly detection using Local Outlier Probabilities (LoOP).", "authors": [{ "first_name": "Valentino", "last_name": "Constantinou", "affiliation": { "name": "NASA Jet Propulsion Laboratory", "department": "Office of the Chief Information Officer" } }] }] # then attempt to fit to the data # create an AuthorRank object ar_graph = ar.Graph() with pytest.warns(UserWarning) as record: # fit to the data ar_graph.fit(documents=data) # check that the message matches messages = [i.message.args[0] for i in record] assert "Number of authors in document set must be greater than one. " \ "AuthorRank not fit to the data, please try again." in messages
def test_normalization(sample_data) -> None: """ Test to ensure that normalizing the author_rank scores returns values between 0 and 1. :param sample_data: the sample data :return: None """ # create an AuthorRank object ar_graph = ar.Graph() # fit to the data and use the progress bar ar_graph.fit(documents=sample_data['documents'], progress_bar=True) # get the top authors for a set of documents and normalize the scores top = ar_graph.top_authors(normalize_scores=True) # check that it returns a tuple assert type(top) == tuple # check to ensure each value in the responses are in the appropriate format for v in top[1]: assert 0. <= v <= 1. # check to ensure that the last entry in the list is a value of 0 assert top[1][-1] == 0. # check to ensure that the first entry in the list is a value of 1 assert top[1][0] == 1.0
def test_export_format(sample_data) -> None: """ Test to ensure that the graph is being effectively exported as a dictionary which is valid JSON. :param sample_data: the sample data :return: None """ # create an AuthorRank object ar_graph = ar.Graph() # fit to the data ar_graph.fit(documents=sample_data["documents"]) # export them export = ar_graph.as_json() assert type(export) == dict
def test_no_fit() -> None: """ Tests whether the AuthorRank approach has been fit to a set of documents prior to calling top_authors, and checks for the correct UserWarning. :return: None """ # create an AuthorRank object ar_graph = ar.Graph() with pytest.warns(UserWarning) as record: # try to fit top authors ar_graph.top_authors(normalize_scores=True) # check that the message matches messages = [i.message.args[0] for i in record] assert "AuthorRank must first be fit on a set of documents " \ "prior to calling top_authors." in messages
# imports import author_rank as ar import json # read in sample json with open("../data/author_network.json", 'r') as f: data = json.load(f) # create an AuthorRank object ar_graph = ar.Graph() # fit to the data ar_graph.fit( documents=data["documents"] ) # export them export = ar_graph.as_json() print(json.dumps(export, indent=4))