def test_write_to_read_from_stream(self): reader0 = parser.Reader(SAMPLE_FILE) aavf_obj = reader0.read_records() out = StringIO() writer = parser.Writer(out, aavf_obj) for record in aavf_obj: writer.write_record(record) out.seek(0) aavf = parser.Reader(out).read_records() out.close() record_list = [record for record in aavf] assert isinstance(aavf, AAVF) assert aavf.metadata.get("fileformat") == "AAVFv1.0", \ "fileformat should be AAVFv1.0, metadata is %s" % aavf.metadata assert aavf.metadata.get("fileDate") == "20180501", \ "filedate should be 20180501, metadata is %s" % aavf.metadata assert aavf.metadata.get("source") == "myProgramV1.0", \ "source should be myProgramV1.0, metadata is %s" % aavf.metadata assert aavf.metadata.get("reference") == ["hxb2.fas"], \ "reference list should be [hxb2.fas], metadata is %s" % aavf.metadata assert aavf.infos assert aavf.filters assert len(record_list) == 7 # all data lines should be the same as in the sample file for record in record_list: assert isinstance(record, Record)
def test_write_and_format_decimals(self): """Test whether writes to file work with specifying a certain number of decimals for the ALT_FREQ field output as expected.""" for num_dec in range(3, 6): reader = parser.Reader(SAMPLE_FILE) aavf_obj = reader.read_records() out = fhandle('sampleoutput4.aavf', "w+") writer = parser.Writer(out, aavf_obj) records = list(aavf_obj) for record in records: writer.write_record(record, decimals=num_dec) out.close() reader1 = parser.Reader(TEST_PATH + '/sampleoutput4.aavf').read_records() reader2 = parser.Reader(SAMPLE_FILE).read_records() writer.close() # each ALT_FREQ field's string should have num_dec + 2 characters # e.g. 0.123 if num_dec is three for left, right in zip(reader1, reader2): assert left.INFO == right.INFO assert left.ALT_FREQ == round(right.ALT_FREQ, num_dec), \ "%s and %s should be the same up to the %dth decimal place" % (left.ALT_FREQ, right.ALT_FREQ, num_dec)
def test_write(self): """Test whether the INFO section can be written correctly.""" reader = parser.Reader(SAMPLE_FILE) aavf_obj = reader.read_records() out = fhandle('sampleoutput2.aavf', "w+") writer = parser.Writer(out, aavf_obj) records = list(aavf_obj) for record in records: writer.write_record(record) writer.flush() writer.close() sample2 = TEST_PATH + "/sampleoutput2.aavf" # initialize readers for iteration below reader = parser.Reader(SAMPLE_FILE) aavf_obj = reader.read_records() reader2 = parser.Reader(sample2) aavf_obj2 = reader2.read_records() # iterate over sample file input and written output to see if they match for left, right in zip(aavf_obj, aavf_obj2): assert left.INFO == right.INFO, "left.INFO is %s and right.INFO is %s" \ % (left.INFO, right.INFO)
def test_walk(self): """ Walk through three readers simultaneously and make sure that the output is identical. """ # easy case: all same sites aavf1 = parser.Reader(SAMPLE_FILE).read_records() aavf2 = parser.Reader(SAMPLE_FILE).read_records() aavf3 = parser.Reader(SAMPLE_FILE).read_records() number = 0 for trio in utils.walk_together(aavf1, aavf2, aavf3): assert len(trio) == 3 assert trio[0] == trio[1] assert trio[1] == trio[2] number += 1 assert number == 7
def test_parse(self): """Test whether the INFO section can be parsed correctly.""" reader = parser.Reader(SAMPLE_FILE) aavf_obj = reader.read_records() record = next(aavf_obj) assert record.INFO['RC'] == 'tca', "record.INFO['RC'] should be 'tca'" + \ ", record.INFO is %s" % record.INFO # the below two RESERVED_INFO constants in the INFO fields have a # number of possible values that varies ior is unbounded. Thus, a list # is returned. assert record.INFO['AC'] == ['tAa'] assert record.INFO['ACF'] == [0.0031]
def test_write_to_file(self): """Test whether writes to file work as expected.""" reader = parser.Reader(SAMPLE_FILE) aavf_obj = reader.read_records() out = fhandle('sampleoutput3.aavf', "w+") writer = parser.Writer(out, aavf_obj) records = list(aavf_obj) for record in records: writer.write_record(record) out.close() reader1 = parser.Reader(TEST_PATH + '/sampleoutput3.aavf').read_records() reader2 = parser.Reader(SAMPLE_FILE).read_records() assert len(list(reader1)) == len(list(reader2)) # all data lines should be read from the sample file reader2 = parser.Reader(SAMPLE_FILE).read_records() for left, right in zip(reader1, reader2): assert left.INFO == right.INFO
def test_aavf_1_0(self): """Test with AAVF Version 1.0""" reader = parser.Reader(SAMPLE_FILE) aavf_obj = reader.read_records() assert 'fileformat' in aavf_obj.metadata.keys(), "Metadata should contain fileformat," + \ "metadata dict is %s" % aavf_obj.metadata.items() assert aavf_obj.metadata['fileformat'] == 'AAVFv1.0' # test we can walk the file at least for line in aavf_obj: if line.POS == 103: assert not line.is_filtered else: assert line.is_filtered
def summarize_sample_calls(in_file, aavf_files): """Provide a single tab delimited output of samples from AAVF inputs. """ out_file = os.path.join(os.getcwd(), "%s-calls.csv" % os.path.splitext(os.path.basename(in_file))[0]) with open(out_file, "w") as out_handle: writer = csv.writer(out_handle) writer.writerow(["sample", "gene", "pos", "ref", "alt", "altfreq", "altcount", "coverage", "filter", "drug", "drugcat"]) for sample, aavf_file in aavf_files: reader = parser.Reader(aavf_file) for rec in reader.read_records(): writer.writerow([sample, rec.GENE, rec.POS, rec.REF, ";".join(rec.ALT), rec.ALT_FREQ, rec.COVERAGE, int(functools.reduce(operator.add, rec.INFO.get("ACC", []))), ";".join(rec.FILTER) if rec.FILTER else "pass", ";".join(rec.INFO.get("DRUG", [])), ";".join(rec.INFO.get("CAT", []))]) return out_file
def output_resistance_levels(aavf_file, xml_file, output_path): """ Parse aavf file to AAVF object. Create XmlAsiTransformer. Use transformer to evaluate drug resistance levels in the AAVF object mutations. Output drug resistance. """ records = list(parser.Reader(aavf_file).read_records()) mutations = defaultdict(list) # parse mutations from records genes = XmlAsiTransformer(True).transform(open(xml_file, "r")) output_file = output_path # handles the case where the output_path was passed from somewhere # other than to click, from the command line, # e.g. from a test file if isinstance(output_path, str): output_file = open(output_path, "w+") output_string = "#gene,drug class,drug,resistance level" # create mutations list for record in records: if record.ALT[0] != "*": mutations[record.GENE].append("%s%s" % (record.POS, record.ALT[0])) for gene in mutations: evaluated_gene = genes[gene].evaluate(mutations[gene], StringMutationComparator(True)) for drug_class in evaluated_gene.get_evaluated_drug_classes(): for drug in drug_class.get_evaluated_drugs(): for condition in drug.get_evaluated_conditions(): definition = next(iter(condition.get_definitions())) output_string += ("\n%s,%s,%s,%s" % (gene, drug_class.get_drug_class().name, drug.get_drug().name, definition.get_text())) output_file.write(output_string) output_file.close() return output_string
def test_read_from_stream(self): """Test whether reads from stream work as expected and if the AAVF record object returned is correct.""" aavf = parser.Reader(open(SAMPLE_FILE, "r")).read_records() record_list = [record for record in aavf] assert isinstance(aavf, AAVF) assert aavf.metadata.get("fileformat") == "AAVFv1.0", \ "fileformat should be AAVFv1.0, metadata is %s" % aavf.metadata assert aavf.metadata.get("fileDate") == "20180501", \ "filedate should be 20180501, metadata is %s" % aavf.metadata assert aavf.metadata.get("source") == "myProgramV1.0", \ "source should be myProgramV1.0, metadata is %s" % aavf.metadata assert aavf.metadata.get("reference") == ["hxb2.fas"], \ "reference list should be [hxb2.fas], metadata is %s" % aavf.metadata assert aavf.infos assert aavf.filters assert len(record_list) == 7 # all data lines should be the same as in the sample file for record in record_list: assert isinstance(record, Record)
def test_writer(self): """ Order of INFO fields should be compatible with the order of their definition in the header and undefined fields should be last and in alphabetical order. """ reader = parser.Reader(SAMPLE_FILE) aavf_obj = reader.read_records() out = StringIO() writer = parser.Writer(out, aavf_obj) for record in aavf_obj: writer.write_record(record) out.seek(0) out_str = out.getvalue() out.close() definitions = [] for line in out_str.split('\n'): if line.startswith('##INFO='): definitions.append(line.split('ID=')[1].split(',')[0]) if not line or line.startswith('#'): continue fields = [f.split('=')[0] for f in line.split('\t')[7].split(';')] self._assert_order(definitions, fields)
def annotate_aavf(in_file, hivdb_file, out_file): """Annotate an AAVF input file with drug resistance changes. """ res_genes = XmlAsiTransformer(True).transform(open(hivdb_file, "r")) reader = parser.Reader(in_file) aavf_obj = reader.read_records() aavf_obj.infos["CAT"] = model.Info("CAT", ".", "String", "Drug resistance category", None, None) aavf_obj.infos["DRUG"] = model.Info("DRUG", ".", "String", "Drug reistances", None, None) with open(out_file, "w") as out_handle: writer = parser.Writer(out_handle, aavf_obj) for rec in aavf_obj: if rec.POS not in rec.ALT: rmuts = evaluate_resistance([rec], res_genes) k = (rec.GENE, "%s%s%s" % (rec.REF, rec.POS, rec.ALT[0])) if rmuts.get(k): cats = [] drugs = [] for cat in rmuts.get(k).keys(): cats.append(cat) drugs.extend(rmuts[k][cat]) rec.INFO["CAT"] = cats rec.INFO["DRUG"] = drugs writer.write_record(rec)