Ejemplo n.º 1
0
    def test_write_to_read_from_stream(self):
        reader0 = parser.Reader(SAMPLE_FILE)
        aavf_obj = reader0.read_records()
        out = StringIO()
        writer = parser.Writer(out, aavf_obj)

        for record in aavf_obj:
            writer.write_record(record)
        out.seek(0)
        aavf = parser.Reader(out).read_records()
        out.close()
        record_list = [record for record in aavf]

        assert isinstance(aavf, AAVF)

        assert aavf.metadata.get("fileformat") == "AAVFv1.0", \
               "fileformat should be AAVFv1.0, metadata is %s" % aavf.metadata
        assert aavf.metadata.get("fileDate") == "20180501", \
               "filedate should be 20180501, metadata is %s" % aavf.metadata
        assert aavf.metadata.get("source") == "myProgramV1.0", \
               "source should be myProgramV1.0, metadata is %s" % aavf.metadata
        assert aavf.metadata.get("reference") == ["hxb2.fas"], \
               "reference list should be [hxb2.fas], metadata is %s" % aavf.metadata
        assert aavf.infos
        assert aavf.filters

        assert len(record_list) == 7
        # all data lines should be the same as in the sample file
        for record in record_list:
            assert isinstance(record, Record)
Ejemplo n.º 2
0
    def test_write_and_format_decimals(self):
        """Test whether writes to file work with specifying a certain number
           of decimals for the ALT_FREQ field output as expected."""

        for num_dec in range(3, 6):
            reader = parser.Reader(SAMPLE_FILE)
            aavf_obj = reader.read_records()
            out = fhandle('sampleoutput4.aavf', "w+")
            writer = parser.Writer(out, aavf_obj)

            records = list(aavf_obj)
            for record in records:
                writer.write_record(record, decimals=num_dec)

            out.close()
            reader1 = parser.Reader(TEST_PATH +
                                    '/sampleoutput4.aavf').read_records()
            reader2 = parser.Reader(SAMPLE_FILE).read_records()
            writer.close()
            # each ALT_FREQ field's string should have num_dec + 2 characters
            # e.g. 0.123 if num_dec is three
            for left, right in zip(reader1, reader2):
                assert left.INFO == right.INFO
                assert left.ALT_FREQ == round(right.ALT_FREQ, num_dec), \
                    "%s and %s should be the same up to the %dth decimal place" % (left.ALT_FREQ, right.ALT_FREQ, num_dec)
Ejemplo n.º 3
0
    def test_write(self):
        """Test whether the INFO section can be written correctly."""
        reader = parser.Reader(SAMPLE_FILE)
        aavf_obj = reader.read_records()
        out = fhandle('sampleoutput2.aavf', "w+")
        writer = parser.Writer(out, aavf_obj)

        records = list(aavf_obj)

        for record in records:
            writer.write_record(record)
        writer.flush()
        writer.close()

        sample2 = TEST_PATH + "/sampleoutput2.aavf"

        # initialize readers for iteration below
        reader = parser.Reader(SAMPLE_FILE)
        aavf_obj = reader.read_records()
        reader2 = parser.Reader(sample2)
        aavf_obj2 = reader2.read_records()

        # iterate over sample file input and written output to see if they match
        for left, right in zip(aavf_obj, aavf_obj2):
            assert left.INFO == right.INFO, "left.INFO is %s and right.INFO is %s" \
                   % (left.INFO, right.INFO)
Ejemplo n.º 4
0
    def test_walk(self):
        """
        Walk through three readers simultaneously and make sure that the
        output is identical.
        """
        # easy case: all same sites
        aavf1 = parser.Reader(SAMPLE_FILE).read_records()
        aavf2 = parser.Reader(SAMPLE_FILE).read_records()
        aavf3 = parser.Reader(SAMPLE_FILE).read_records()

        number = 0
        for trio in utils.walk_together(aavf1, aavf2, aavf3):
            assert len(trio) == 3
            assert trio[0] == trio[1]
            assert trio[1] == trio[2]
            number += 1
        assert number == 7
Ejemplo n.º 5
0
 def test_parse(self):
     """Test whether the INFO section can be parsed correctly."""
     reader = parser.Reader(SAMPLE_FILE)
     aavf_obj = reader.read_records()
     record = next(aavf_obj)
     assert record.INFO['RC'] == 'tca', "record.INFO['RC'] should be 'tca'" + \
            ", record.INFO is %s" %  record.INFO
     # the below two RESERVED_INFO constants in the INFO fields have a
     # number of possible values that varies ior is unbounded. Thus, a list
     # is returned.
     assert record.INFO['AC'] == ['tAa']
     assert record.INFO['ACF'] == [0.0031]
Ejemplo n.º 6
0
    def test_write_to_file(self):
        """Test whether writes to file work as expected."""
        reader = parser.Reader(SAMPLE_FILE)
        aavf_obj = reader.read_records()
        out = fhandle('sampleoutput3.aavf', "w+")
        writer = parser.Writer(out, aavf_obj)

        records = list(aavf_obj)

        for record in records:
            writer.write_record(record)

        out.close()
        reader1 = parser.Reader(TEST_PATH +
                                '/sampleoutput3.aavf').read_records()

        reader2 = parser.Reader(SAMPLE_FILE).read_records()
        assert len(list(reader1)) == len(list(reader2))
        # all data lines should be read from the sample file

        reader2 = parser.Reader(SAMPLE_FILE).read_records()
        for left, right in zip(reader1, reader2):
            assert left.INFO == right.INFO
Ejemplo n.º 7
0
    def test_aavf_1_0(self):
        """Test with AAVF Version 1.0"""
        reader = parser.Reader(SAMPLE_FILE)
        aavf_obj = reader.read_records()

        assert 'fileformat' in aavf_obj.metadata.keys(), "Metadata should contain fileformat," + \
               "metadata dict is %s" % aavf_obj.metadata.items()

        assert aavf_obj.metadata['fileformat'] == 'AAVFv1.0'

        # test we can walk the file at least
        for line in aavf_obj:

            if line.POS == 103:
                assert not line.is_filtered
            else:
                assert line.is_filtered
Ejemplo n.º 8
0
def summarize_sample_calls(in_file, aavf_files):
    """Provide a single tab delimited output of samples from AAVF inputs.
    """
    out_file = os.path.join(os.getcwd(), "%s-calls.csv" % os.path.splitext(os.path.basename(in_file))[0])
    with open(out_file, "w") as out_handle:
        writer = csv.writer(out_handle)
        writer.writerow(["sample", "gene", "pos", "ref", "alt", "altfreq", "altcount", "coverage",
                         "filter", "drug", "drugcat"])
        for sample, aavf_file in aavf_files:
            reader = parser.Reader(aavf_file)
            for rec in reader.read_records():
                writer.writerow([sample, rec.GENE, rec.POS, rec.REF, ";".join(rec.ALT),
                                 rec.ALT_FREQ, rec.COVERAGE,
                                 int(functools.reduce(operator.add, rec.INFO.get("ACC", []))),
                                 ";".join(rec.FILTER) if rec.FILTER else "pass",
                                 ";".join(rec.INFO.get("DRUG", [])), ";".join(rec.INFO.get("CAT", []))])
    return out_file
Ejemplo n.º 9
0
def output_resistance_levels(aavf_file, xml_file, output_path):
    """
    Parse aavf file to AAVF object.
    Create XmlAsiTransformer.
    Use transformer to evaluate drug resistance levels in the
    AAVF object mutations.
    Output drug resistance.
    """

    records = list(parser.Reader(aavf_file).read_records())
    mutations = defaultdict(list)  # parse mutations from records
    genes = XmlAsiTransformer(True).transform(open(xml_file, "r"))

    output_file = output_path

    # handles the case where the output_path was passed from somewhere
    # other than to click, from the command line,
    # e.g. from a test file
    if isinstance(output_path, str):
        output_file = open(output_path, "w+")

    output_string = "#gene,drug class,drug,resistance level"

    # create mutations list
    for record in records:
        if record.ALT[0] != "*":
            mutations[record.GENE].append("%s%s" % (record.POS, record.ALT[0]))

    for gene in mutations:
        evaluated_gene = genes[gene].evaluate(mutations[gene],
                                              StringMutationComparator(True))

        for drug_class in evaluated_gene.get_evaluated_drug_classes():
            for drug in drug_class.get_evaluated_drugs():
                for condition in drug.get_evaluated_conditions():
                    definition = next(iter(condition.get_definitions()))

                    output_string += ("\n%s,%s,%s,%s" %
                                      (gene, drug_class.get_drug_class().name,
                                       drug.get_drug().name,
                                       definition.get_text()))

    output_file.write(output_string)
    output_file.close()

    return output_string
Ejemplo n.º 10
0
    def test_read_from_stream(self):
        """Test whether reads from stream work as expected and if the AAVF
           record object returned is correct."""
        aavf = parser.Reader(open(SAMPLE_FILE, "r")).read_records()
        record_list = [record for record in aavf]

        assert isinstance(aavf, AAVF)

        assert aavf.metadata.get("fileformat") == "AAVFv1.0", \
               "fileformat should be AAVFv1.0, metadata is %s" % aavf.metadata
        assert aavf.metadata.get("fileDate") == "20180501", \
               "filedate should be 20180501, metadata is %s" % aavf.metadata
        assert aavf.metadata.get("source") == "myProgramV1.0", \
               "source should be myProgramV1.0, metadata is %s" % aavf.metadata
        assert aavf.metadata.get("reference") == ["hxb2.fas"], \
               "reference list should be [hxb2.fas], metadata is %s" % aavf.metadata
        assert aavf.infos
        assert aavf.filters

        assert len(record_list) == 7
        # all data lines should be the same as in the sample file
        for record in record_list:
            assert isinstance(record, Record)
Ejemplo n.º 11
0
    def test_writer(self):
        """
        Order of INFO fields should be compatible with the order of their
        definition in the header and undefined fields should be last and in
        alphabetical order.
        """
        reader = parser.Reader(SAMPLE_FILE)
        aavf_obj = reader.read_records()
        out = StringIO()
        writer = parser.Writer(out, aavf_obj)

        for record in aavf_obj:
            writer.write_record(record)
        out.seek(0)
        out_str = out.getvalue()
        out.close()
        definitions = []
        for line in out_str.split('\n'):
            if line.startswith('##INFO='):
                definitions.append(line.split('ID=')[1].split(',')[0])
            if not line or line.startswith('#'):
                continue
            fields = [f.split('=')[0] for f in line.split('\t')[7].split(';')]
            self._assert_order(definitions, fields)
Ejemplo n.º 12
0
def annotate_aavf(in_file, hivdb_file, out_file):
    """Annotate an AAVF input file with drug resistance changes.
    """
    res_genes = XmlAsiTransformer(True).transform(open(hivdb_file, "r"))

    reader = parser.Reader(in_file)
    aavf_obj = reader.read_records()
    aavf_obj.infos["CAT"] = model.Info("CAT", ".", "String", "Drug resistance category", None, None)
    aavf_obj.infos["DRUG"] = model.Info("DRUG", ".", "String", "Drug reistances", None, None)
    with open(out_file, "w") as out_handle:
        writer = parser.Writer(out_handle, aavf_obj)
        for rec in aavf_obj:
            if rec.POS not in rec.ALT:
                rmuts = evaluate_resistance([rec], res_genes)
                k = (rec.GENE, "%s%s%s" % (rec.REF, rec.POS, rec.ALT[0]))
                if rmuts.get(k):
                    cats = []
                    drugs = []
                    for cat in rmuts.get(k).keys():
                        cats.append(cat)
                        drugs.extend(rmuts[k][cat])
                    rec.INFO["CAT"] = cats
                    rec.INFO["DRUG"] = drugs
                    writer.write_record(rec)