def assertEqualRecommendedData(self, expectedData, recommendedData, query): """Run assertEqualGeneral on the key components of the contents of the recommendation data. Don't necessarily care about the specific numbers that come out of the recommendations, but do care about consistency in rankings and relative order by the query.sortField """ lastScore = None for expectedItem, recommendedItem in zip(expectedData, recommendedData): # Ensure derived statistics are populated to enable comparisons ItemAssociationRecommender.populateDerivedStats( recommendedItem, expectedItem.keys()) self.assertEqualDict(expectedItem, recommendedItem, ["clinical_item_id"]) for key in expectedItem.iterkeys( ): # If specified, then verify a specific values if isinstance(expectedItem[key], float): self.assertAlmostEquals(expectedItem[key], recommendedItem[key], 5) else: self.assertEqual(expectedItem[key], recommendedItem[key]) if lastScore is not None: self.assertTrue(recommendedItem[query.sortField] <= lastScore) # Verify descending order of scores lastScore = recommendedItem[query.sortField] self.assertEqual(len(expectedData), len(recommendedData))
def assertEqualRecommendedDataStats(self, expectedData, recommendedData, headers): """Run assertEqualGeneral on the key components of the contents of the recommendation data. In this case, we do want to verify actual score / stat values match """ self.assertEqual( len(expectedData), len(recommendedData) ); for expectedItem, recommendedItem in zip(expectedData, recommendedData): # Ensure the recommendedData has all fields of interest populated / calculated ItemAssociationRecommender.populateDerivedStats( recommendedItem, headers ); for header in headers: expectedValue = expectedItem[header]; recommendedValue = recommendedItem[header]; msg = 'Dicts diff with key (%s). Verify = %s, Sample = %s' % (header, expectedValue, recommendedValue); self.assertAlmostEqual(expectedValue, recommendedValue, 3, msg);
# Call ItemRecommender recommendations = recommender(query) # Output to csv file description = description.replace("/", ";") fname = str(clinical_item_id) + " " + str(description) + ".csv" outfname = open( "/Users/jwang/Desktop/Results/item_associations_expert_unmatched/" + fname, "w") outfname.write( "clinical_item_id,description,score,PPV,OR,prevalence,RR,P-YatesChi2\n" ) association_count = 0 for rec in recommendations: recommender.populateDerivedStats( rec, ["PPV", "OR", "prevalence", "RR", "P-YatesChi2"]) outfname.write("{0},{1},{2},{3},{4},{5},{6},{7}\n".format( rec["clinical_item_id"], id2description[str(rec["clinical_item_id"])], rec["score"], rec["PPV"], rec["OR"], rec["prevalence"], rec["RR"], rec["P-YatesChi2"])) association_count += 1 if (association_count == NUM_ASSOCIATIONS): break diagnosis_count += 1 if (diagnosis_count == NUM_DIAGNOSES): break # Add more stats: look at main function