def test_premade_alignment(self): compare_to_lanl = True input_fn = self.fn reference = self.reference id = os.path.basename(input_fn) status_file = input_fn+'_status' #Ensure that you cannot compare to lanl if skipping alignment with self.assertRaises(Exception): results = hivtrace.hivtrace(id, input_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, compare_to_lanl, '0.015', handle_contaminants='remove', filter_edges='remove', skip_alignment=True) #Ensure that unequal sequence lengths fail with self.assertRaises(Exception): results = hivtrace.hivtrace(id, input_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015', handle_contaminants='remove', filter_edges='remove', skip_alignment=True) results = hivtrace.hivtrace(id, self.aligned_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015', handle_contaminants='remove', filter_edges='remove', skip_alignment=True) self.assertTrue('trace_results' in results.keys())
def test_env(self): id = os.path.basename(self.env_fn) reference = 'HXB2_env' # run the whole thing and make sure it completed via the status file hivtrace.hivtrace(id, self.env_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015') self.assertTrue(True)
def test_env(self): compare_to_lanl = True id = os.path.basename(self.env_fn) reference = 'HXB2_env' strip_drams = False #run the whole thing and make sure it completed via the status file hivtrace.hivtrace(id, self.env_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015') self.assertTrue(True)
def test_contaminant_screening_separately(self): this_dirname = os.path.join(os.path.dirname( os.path.realpath(__file__))) compare_to_lanl = False input_fn = path.join(this_dirname, 'rsrc/CONTAM.fasta') reference = path.join(this_dirname, 'rsrc/HXB2_1497.fasta') id = os.path.basename(input_fn) status_file = input_fn + '_status' results = hivtrace.hivtrace(id, input_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015', handle_contaminants='separately', filter_edges='remove', skip_alignment=True) self.assertTrue("contaminant_sequences" in results["trace_results"] ["Network Summary"])
def test_contaminant_annotations(self): this_dirname = os.path.join(os.path.dirname( os.path.realpath(__file__))) compare_to_lanl = False input_fn = path.join(this_dirname, 'rsrc/CONTAM.fasta') reference = self.reference id = os.path.basename(input_fn) status_file = input_fn + '_status' results = hivtrace.hivtrace(id, input_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015', handle_contaminants='report', filter_edges='remove', skip_alignment=True) nodes = list( filter(lambda x: "HXB2" in x["id"], results["trace_results"]["Nodes"])) self.assertTrue(all('problematic' in n["attributes"] for n in nodes))
def test_keep_singletons(self): this_dirname = os.path.join(os.path.dirname( os.path.realpath(__file__))) compare_to_lanl = False #input_fn = self.fn input_fn = path.join(this_dirname, 'rsrc/TEST2.FASTA') reference = self.reference id = os.path.basename(input_fn) status_file = input_fn + '_status' results = hivtrace.hivtrace(id, input_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015', handle_contaminants='report', filter_edges='remove', skip_alignment=True) self.assertTrue('Singletons' in results['trace_results'].keys())
def test_strip_reference_sequences(self): id = os.path.basename(self.fn) compare_to_lanl = False ##run the whole thing and make sure it completed via the status file results = hivtrace.hivtrace(id, self.fn, self.reference, self.ambiguities, self.distance_threshold, self.min_overlap, compare_to_lanl, '0.025') [ self.assertTrue("removed" in edge) for edge in results["trace_results"]["Edges"] ] # Read output json known_contaminants = [ 'B|FR|A04321|1983', '08_BC_HXB2_SABOTAGE|CN|AB078686|2000' ] [ self.assertTrue(not any([k in node for k in known_contaminants])) for node in results["trace_results"]["Nodes"] ] return
def test_empty_contaminants(self): compare_to_lanl = False input_fn = self.fn reference = self.reference id = os.path.basename(input_fn) status_file = input_fn+'_status' #run the whole thing and make sure it completed via the status file results = hivtrace.hivtrace(id, input_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015', handle_contaminants='remove', filter_edges='remove')
def test_keep_singletons(self): this_dirname = os.path.join(os.path.dirname(os.path.realpath(__file__))) compare_to_lanl = False #input_fn = self.fn input_fn = path.join(this_dirname, 'rsrc/TEST2.FASTA') reference = self.reference id = os.path.basename(input_fn) status_file = input_fn+'_status' results = hivtrace.hivtrace(id, input_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015', handle_contaminants='report', filter_edges='remove', skip_alignment=True) self.assertTrue('Singletons' in results['trace_results'].keys())
def test_contaminant_annotations(self): this_dirname = os.path.join(os.path.dirname(os.path.realpath(__file__))) compare_to_lanl = False #input_fn = self.fn input_fn = path.join(this_dirname, 'rsrc/CONTAM.fasta') reference = self.reference id = os.path.basename(input_fn) status_file = input_fn+'_status' results = hivtrace.hivtrace(id, input_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015', handle_contaminants='report', filter_edges='remove', skip_alignment=True) nodes = list(filter(lambda x: "HXB2" in x["id"], results["trace_results"]["Nodes"])) self.assertTrue(all('problematic' in n["attributes"] for n in nodes))
def test_hivtrace_lanl(self): id = os.path.basename(self.fn) compare_to_lanl = True #run the whole thing and make sure it completed via the status file results = hivtrace.hivtrace(id, self.fn, self.reference, self.ambiguities, self.distance_threshold, self.min_overlap, compare_to_lanl, '0.025', False, "report") # Read output json self.assertTrue(results["lanl_trace_results"]["Network Summary"]["Clusters"] == 2) self.assertTrue(results["lanl_trace_results"]["Network Summary"]["Edges"] == 31) self.assertTrue(results["lanl_trace_results"]["Network Summary"]["Nodes"] == 13) self.assertTrue(set(results["lanl_trace_results"].keys()) == set(['Cluster sizes', 'Edge Stages', 'Edges', 'HIV Stages', 'Network Summary', 'Settings', 'Degrees', 'Directed Edges', 'Multiple sequences', 'Nodes'])) return
def test_strip_reference_sequences(self): id = os.path.basename(self.fn) compare_to_lanl = False ##run the whole thing and make sure it completed via the status file results = hivtrace.hivtrace(id, self.fn, self.reference, self.ambiguities, self.distance_threshold, self.min_overlap, compare_to_lanl, '0.025') [self.assertTrue("removed" in edge) for edge in results["trace_results"]["Edges"]] # Read output json known_contaminants = ['B|FR|A04321|1983', '08_BC_HXB2_SABOTAGE|CN|AB078686|2000'] [self.assertTrue(not any([k in node for k in known_contaminants])) for node in results["trace_results"]["Nodes"]] return
def test_strip_drams(self): #run the whole thing and make sure it completed via the status file results = strip_drams(self.fn, 'lewis') self.assertTrue(results.__next__()[1][120:123] == '---') self.assertTrue(results.__next__()[1][672:675] == '---') self.assertFalse(results.__next__()[1][687:690] == '---') results = strip_drams(self.fn, 'wheeler') self.assertTrue(results.__next__()[1][129:132] == '---') self.assertTrue(results.__next__()[1][687:690] == '---') results = hivtrace.hivtrace(id, self.fn, self.reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.025', 'lewis') self.assertTrue(results["trace_results"]) return
def test_custom_reference(self): compare_to_lanl = True input_fn = self.fn reference = self.custom_reference id = os.path.basename(input_fn) status_file = input_fn+'_status' known_contaminants = ['Z|JP|K03455|2036|7'] #run the whole thing and make sure it completed via the status file results = hivtrace.hivtrace(id, input_fn, reference, self.ambiguities, self.distance_threshold, self.min_overlap, False, '0.015') # Read output json known_contaminants = ['B|FR|A04321|1983', '08_BC_HXB2_SABOTAGE|CN|AB078686|2000'] [self.assertTrue(not any([k in node for k in known_contaminants])) for node in results["trace_results"]["Nodes"]]