Example #1
0
def add_genome_context(orig_file, data):
    """Annotate a file with annotations of genome context using vcfanno.
    """
    out_file = "%s-context.vcf.gz" % utils.splitext_plus(orig_file)[0]
    if not utils.file_uptodate(out_file, orig_file):
        with file_transaction(data, out_file) as tx_out_file:
            config_file = "%s.toml" % (utils.splitext_plus(tx_out_file)[0])
            with open(config_file, "w") as out_handle:
                all_names = []
                for fname in dd.get_genome_context_files(data):
                    bt = pybedtools.BedTool(fname)
                    if bt.field_count() >= 4:
                        d, base = os.path.split(fname)
                        _, prefix = os.path.split(d)
                        name = "%s_%s" % (prefix, utils.splitext_plus(base)[0])
                        out_handle.write("[[annotation]]\n")
                        out_handle.write('file = "%s"\n' % fname)
                        out_handle.write("columns = [4]\n")
                        out_handle.write('names = ["%s"]\n' % name)
                        out_handle.write('ops = ["uniq"]\n')
                        all_names.append(name)
                out_handle.write("[[postannotation]]\n")
                out_handle.write("fields = [%s]\n" % (", ".join(['"%s"' % n for n in all_names])))
                out_handle.write('name = "genome_context"\n')
                out_handle.write('op = "concat"\n')
                out_handle.write('type = "String"\n')
            cmd = "vcfanno {config_file} {orig_file} | bgzip -c > {tx_out_file}"
            do.run(cmd.format(**locals()), "Annotate with problem annotations", data)
    return vcfutils.bgzip_and_index(out_file, data["config"])
Example #2
0
def add_genome_context(orig_file, data):
    """Annotate a file with annotations of genome context using vcfanno.
    """
    out_file = "%s-context.vcf.gz" % utils.splitext_plus(orig_file)[0]
    if not utils.file_uptodate(out_file, orig_file):
        with file_transaction(data, out_file) as tx_out_file:
            config_file = "%s.toml" % (utils.splitext_plus(tx_out_file)[0])
            with open(config_file, "w") as out_handle:
                all_names = []
                for fname in dd.get_genome_context_files(data):
                    bt = pybedtools.BedTool(fname)
                    if bt.field_count() >= 4:
                        d, base = os.path.split(fname)
                        _, prefix = os.path.split(d)
                        name = "%s_%s" % (prefix, utils.splitext_plus(base)[0])
                        out_handle.write("[[annotation]]\n")
                        out_handle.write('file = "%s"\n' % fname)
                        out_handle.write("columns = [4]\n")
                        out_handle.write('names = ["%s"]\n' % name)
                        out_handle.write('ops = ["uniq"]\n')
                        all_names.append(name)
                out_handle.write("[[postannotation]]\n")
                out_handle.write("fields = [%s]\n" % (", ".join(['"%s"' % n for n in all_names])))
                out_handle.write('name = "genome_context"\n')
                out_handle.write('op = "concat"\n')
                out_handle.write('type = "String"\n')
            cmd = "vcfanno {config_file} {orig_file} | bgzip -c > {tx_out_file}"
            do.run(cmd.format(**locals()), "Annotate with problem annotations", data)
    return vcfutils.bgzip_and_index(out_file, data["config"])