コード例 #1
0
ファイル: mvf_filter.py プロジェクト: wum5/mvftools
def main(arguments=sys.argv[1:]):
    """Main method for mvf_filter"""
    parser = argparse.ArgumentParser(description="""
    Filters and Transforms MVF files""")
    parser.add_argument("--mvf", help="input MVF file")
    parser.add_argument("--out", help="output MVF file")
    parser.add_argument("--actions", nargs='*',
                        help=("set of actions:args to perform,"
                              " note these are done in order as listed"))
    parser.add_argument("--test", help="manually input a line for testing")
    parser.add_argument("--testnchar", type=int,
                        help="total number of samples for test string")
    parser.add_argument("--modulehelp", action="store_true",
                        help="prints full module list and descriptions")
    parser.add_argument("--linebuffer", type=int, default=100000,
                        help="number of lines to write at once to MVF")
    parser.add_argument("--verbose", action="store_true",
                        help="report every line (for debugging)")
    parser.add_argument("--overwrite", action="store_true",
                        help="USE WITH CAUTION: force overwrite of outputs")
    parser.add_argument("--quiet", action="store_true",
                        help="suppress progress meter")
    parser.add_argument("-v", "--version", action="store_true",
                        help="display version information")
    args = parser.parse_args(args=arguments)
    if args.version:
        print("Version 2015-02-26")
        sys.exit()
    args = parser.parse_args(args=arguments)
    time0 = time()
    if args.modulehelp:
        modulehelp()
    if not args.mvf and not args.test:
        raise RuntimeError("No input file specified with --mvf")
    if not args.out and not args.test:
        raise RuntimeError("No output file specified with --outs")
    if not args.actions:
        raise RuntimeError("No --actions specified!")
    ## Establish Input MVF
    if args.test:
        ncol = args.testnchar or len(args.test)
    else:
        mvf = MultiVariantFile(args.mvf, 'read')
        ncol = mvf.metadata['ncol']
    ## Create Actionset
    actionset = build_actionset(args.actions, ncol)
    ##TESTING MODE
    if args.test:
        loc, alleles = args.test.split()
        linefail = False
        transformed = False
        #invar = invariant (single character)
        #refvar (all different than reference, two chars)
        #onecov (single coverage, + is second character)
        #onevar (one variable base, + is third character)
        #full = full alleles (all chars)
        if args.verbose:
            print(alleles)
        linetype = get_linetype(alleles)
        sys.stdout.write("MVF Encoding type '{}' detected\n".format(linetype))
        for actionname, actiontype, actionfunc, actionarg in actionset:
            sys.stdout.write("Applying action {} ({}): ".format(
                actionname, actiontype))
            if actiontype == 'filter':
                if not actionfunc(alleles, linetype):
                    linefail = True
                    sys.stdout.write("Filter Fail\n")
                    break
                else:
                    sys.stdout.write("Filter Pass\n")
            elif actiontype == 'transform':
                transformed = True
                alleles = actionfunc(alleles, linetype)
                linetype = get_linetype(alleles)
                if linetype == 'empty':
                    linefail = True
                    sys.stdout.write("Transform removed all alleles\n")
                    break
                else:
                    sys.stdout.write("Transform result {}\n".format(alleles))
            elif actiontype == 'location':
                if not actionfunc([int(x) for x in loc.split(':')]):
                    linefail = True
                    sys.stdout.write("Location Fail\n")
                    break
                else:
                    sys.stdout.write("Location Pass\n")
        if not linefail:
            if transformed:
                if linetype == 'full':
                    alleles = encode_mvfstring(alleles)
                if alleles:
                    test_output = "{}\t{}\n".format(loc, alleles)
                    sys.stdout.write("Final output = {}\n".format(
                        test_output))
                else:
                    sys.stdout.write("Transform removed all alleles\n")
            else:
                sys.stdout.write("No changes applied\n")
                sys.stdout.write("Final output = {}\n".format(args.test))
        sys.exit()
    ## MAIN MODE
    ## Set up file handler
    outmvf = MultiVariantFile(args.out, 'write', overwrite=args.overwrite)
    outmvf.metadata = deepcopy(mvf.metadata)
    ### reprocess header if actions are used that filter columns
    if any(x == y[0] for x in ('columns', 'collapsepriority')
           for y in actionset):
        labels = outmvf.metadata['labels'][:]
        for actionname, actiontype, actionfunc, actionarg in actionset:
            if actionname == 'columns':
                labels = [labels[x] for x in actionarg]
            elif actionname == 'collapsepriority':
                labels = [labels[x] for x in xrange(len(labels))
                          if x not in actionarg[1:]]
        oldindicies = mvf.get_sample_indices(labels)
        newsamples = {}
        for i, _ in enumerate(labels):
            newsamples[i] = mvf.metadata['samples'][oldindicies[i]]
        outmvf.metadata['samples'] = newsamples.copy()
        outmvf.metadata['labels'] = labels[:]
    outmvf.write_data(outmvf.get_header())
    ## End header editing
    linebuffer = []
    nbuffer = 0
    for chrom, pos, allelesets in mvf.iterentries(decode=False):
        linefail = False
        transformed = False
        #invar = invariant (single character)
        #refvar (all different than reference, two chars)
        #onecov (single coverage, + is second character)
        #onevar (one variable base, + is third character)
        #full = full alleles (all chars)
        alleles = allelesets[0]
        linetype = get_linetype(alleles)
        if linetype == 'empty':
            continue
        if args.verbose:
            sys.stdout.write(" {} {}".format(alleles, linetype))
        for actionname, actiontype, actionfunc, actionarg in actionset:
            if actiontype == 'filter':
                if not actionfunc(alleles, linetype):
                    linefail = True
            elif actiontype == 'transform':
                transformed = True
                alleles = actionfunc(alleles, linetype)
                linetype = get_linetype(alleles)
                if linetype == 'empty':
                    linefail = True
            elif actiontype == 'location':
                if not actionfunc([chrom, pos]):
                    linefail = True
            if linefail:
                break
        if not linefail:
            if transformed:
                if linetype == 'full':
                    alleles = mvf.encode(alleles)
                if not alleles:
                    linefail = True
        if not linefail:
            nbuffer += 1
            linebuffer.append((chrom, pos, (alleles,)))
            if args.verbose:
                sys.stdout.write("{}\n".format(alleles))
            if nbuffer == args.linebuffer:
                outmvf.write_entries(linebuffer)
                linebuffer = []
                nbuffer = 0
        elif args.verbose:
            sys.stdout.write("FAIL\n")
    if linebuffer:
        outmvf.write_entries(linebuffer)
        linebuffer = []
    if not args.quiet:
        print("Completed in {} seconds".format(time() - time0))
    return ''
コード例 #2
0
ファイル: fasta2mvf.py プロジェクト: wum5/mvftools
def main(arguments=sys.argv[1:]):
    """Main method for fasta2mvf"""
    parser = argparse.ArgumentParser(description="""
    Converts multisample-FASTA to MVF file with filtering """)
    parser.add_argument("--fasta", help="input FASTA file", required=True)
    parser.add_argument("--out", help="output MVF file", required=True)
    parser.add_argument("--contigids", nargs='*',
                        help=("""manually specify one or more contig ids
                                 as ID:NAME"""))
    parser.add_argument("--samplereplace", nargs="*",
                        help="""one or more TAG:NEWLABEL or TAG, items,
                                if TAG found in sample label, replace with
                                NEW (or TAG if NEW not specified)
                                NEW and TAG must each be unique""")
    parser.add_argument("--reflabel", default="REF",
                        help="label for reference sample (default='REF')")
    parser.add_argument("--allelesfrom", default=None,
                        help="""get additional alignment columns
                                from INFO fields (:-separated)""")
    parser.add_argument("--readbuffer", type=int, default=100000,
                        help="number of lines to hold in READ buffer")
    parser.add_argument("--writebuffer", type=int, default=100000,
                        help="number of lines to hold in WRITE buffer")
    parser.add_argument("--fieldsep", default="NONE",
                        choices=['TAB', 'SPACE', 'DBLSPACE',
                                 'COMMA', 'MIXED', 'PIPE'],
                        help="""FASTA field separator; assumes
                                '>database/SEP/accession/SEP/locus'
                                format (default='NONE')""")
    parser.add_argument("--contigfield", type=int,
                        help="""when headers are split by --fieldsep,
                        the 0-based index of the contig id""")
    parser.add_argument("--samplefield", type=int,
                        help="""when headers are split by --fieldsep,
                        the 0-based index of the sample id""")
    parser.add_argument("--overwrite", action="store_true",
                        help="USE WITH CAUTION: force overwrite of outputs")
    parser.add_argument("-v", "--version", action="store_true",
                        help="display version information")
    args = parser.parse_args(args=arguments)
    if args.version:
        print("Version 2015-07-07")
        sys.exit()
    sepchars = dict([("PIPE", "|"), ("TAB", "\t"),
                     ("SPACE", " "), ("DBLSPACE", "  "),
                     ("COMMA", ","), ("NONE", None)])
    args.fieldsep = sepchars[args.fieldsep]
    mvf = MultiVariantFile(args.out, 'write', overwrite=args.overwrite)
    fasta = {}
    current_contig = 0
    fsamples = []
    fcontigs = []
    for header, seq in fasta_iter(args.fasta):
        header = header.split(args.fieldsep)
        if (len(header) < max(3, args.contigfield or 0, args.samplefield or 0)
                or args.contigfield is None or args.samplefield is None):
            contig = "UNK{}".format(current_contig)
            sample = header[0]
        else:
            contig = header[args.contigfield]
            sample = header[args.samplefield]
        if contig not in fcontigs:
            fcontigs.append(contig)
            fasta[contig] = {}
        if sample not in fsamples:
            fsamples.append(sample)
        fasta[contig][sample] = (len(seq), seq)
    reflabel = None
    if args.reflabel:
        for i, samplename in enumerate(fsamples):
            if args.reflabel in samplename:
                reflabel = i
                break
    if reflabel:
        newref = fsamples.pop(i)
        fsamples = [newref] + fsamples
    for i, contig in enumerate(fcontigs):
        mvf.metadata['contigs'][i] = {
            'label': contig,
            'length': max([fasta[contig][x][0] for x in fasta[contig]])}
    mvf.metadata['labels'] = fsamples[:]
    for i, label in enumerate(fsamples[:]):
        mvf.metadata['samples'][i] = {'label': label}
    mvf.metadata['ncol'] = len(mvf.metadata['labels'])
    mvf.metadata['sourceformat'] = 'fasta'
    # WRITE MVF HEADER
    mvf.write_data(mvf.get_header())
    mvfentries = []
    nentry = 0
    mvf_alleles = {}
    for cind, contig in enumerate(fcontigs):
        for pos in range(mvf.metadata['contigs'][cind]['length']):
            print(''.join(pos > fasta[contig][samp][0] and '-' or
                          fasta[contig][samp][1][pos]
                          for samp in fsamples))
            mvf_alleles = encode_mvfstring(
                ''.join(pos > fasta[contig][samp][0] and '-' or
                        fasta[contig][samp][1][pos]
                        for samp in fsamples))
            if mvf_alleles:
                mvfentries.append(
                    (cind, pos+1, (mvf_alleles,)))
                nentry += 1
                if nentry == args.writebuffer:
                    mvf.write_entries(mvfentries, encoded=True)
                    print(mvfentries[:5])
                    mvfentries = []
                    nentry = 0
    if mvfentries:
        mvf.write_entries(mvfentries)
        mvfentries = []
    return ''
コード例 #3
0
ファイル: vcf2mvf.py プロジェクト: wum5/mvftools
def main(arguments=sys.argv[1:]):
    """Main method for vcf2mvf"""
    parser = argparse.ArgumentParser(
        description="""
    Converts multisample-VCF to MVF file with filtering """
    )
    parser.add_argument("--vcf", help="input VCF file", required=True)
    parser.add_argument("--out", help="output MVF file", required=True)
    parser.add_argument("--maskdepth", type=int, default=1, help="below this depth mask with N/n")
    parser.add_argument(
        "--lowdepth",
        type=int,
        default=3,
        help="""below this depth convert to lower case
                              set to 0 to disable""",
    )
    parser.add_argument(
        "--maskqual",
        type=int,
        default=3,
        help="""low quality cutoff, bases replaced by N/-
                             set to 0 to disable""",
    )
    parser.add_argument(
        "--lowqual",
        type=int,
        default=20,
        help="""below this quality convert to lower case
                                set to 0 to disable""",
    )
    parser.add_argument(
        "--contigids",
        nargs="*",
        help=(
            """manually specify one or more contig ids
                                 as ID:NAME"""
        ),
    )
    parser.add_argument(
        "--samplereplace",
        nargs="*",
        help="""one or more TAG:NEWLABEL or TAG, items,
                                if TAG found in sample label, replace with
                                NEW (or TAG if NEW not specified)
                                NEW and TAG must each be unique""",
    )
    parser.add_argument("--reflabel", default="REF", help="label for reference sample (default='REF')")
    parser.add_argument(
        "--allelesfrom",
        default=None,
        help="""get additional alignment columns
                                from INFO fields (:-separated)""",
    )
    parser.add_argument("--linebuffer", type=int, default=100000, help="number of lines to hold in read/write buffer")
    parser.add_argument("--no_autoindex", action="store_true", help="do not automatically index contigs from the VCF")
    parser.add_argument(
        "--fieldsep",
        default="TAB",
        choices=["TAB", "SPACE", "DBLSPACE", "COMMA", "MIXED"],
        help="""VCF field separator (default='TAB')""",
    )
    parser.add_argument("--overwrite", action="store_true", help="USE WITH CAUTION: force overwrite of outputs")
    parser.add_argument("--quiet", action="store_true", help="suppress progress meter")
    parser.add_argument("-v", "--version", action="store_true", help="display version information")
    args = parser.parse_args(args=arguments)
    if args.version:
        print("Version 2015-02-26")
        sys.exit()
    sepchars = dict([("TAB", "\t"), ("SPACE", " "), ("DBLSPACE", "  "), ("COMMA", ","), ("MIXED", None)])
    args.fieldsep = sepchars[args.fieldsep]
    ## ESTABLISH VCF
    vcf = VariantCallFile(args.vcf, indexcontigs=(not args.no_autoindex))
    ## ESTABLISH MVF
    mvf = MultiVariantFile(args.out, "write", overwrite=args.overwrite)
    # PROCESS CONTIG INFO
    contigs = vcf.metadata["contigs"].copy()
    maxcontigid = 0
    newids = set([])
    if args.contigids:
        for cid, cname in (x.split(":") for x in args.contigids):
            for tempid in contigs:
                if cname in contigs[tempid]["label"]:
                    try:
                        cid = int(cid)
                    except ValueError:
                        pass
                    mvf.metadata["contigs"][cid] = contigs[tempid].copy()
                    del contigs[tempid]
                    newids.update([cid])
                    break
        for cid in newids:
            try:
                maxcontigid = max([maxcontigid, int(cid) + 1])
            except ValueError:
                continue
    tempids = set(contigs.keys()) - newids
    for tempid, newid in sorted(zip(tempids, xrange(maxcontigid, maxcontigid + len(tempids)))):
        mvf.metadata["contigs"][newid] = vcf.metadata["contigs"][tempid]
    contig_translate = dict([(mvf.metadata["contigs"][x]["label"], x) for x in mvf.metadata["contigs"]])
    # PROCESS SAMPLE INFO
    samplelabels = [args.reflabel] + vcf.metadata["samples"][:]
    if args.allelesfrom:
        args.allelesfrom = args.allelesfrom.split(":")
        samplelabels += args.allelesfrom
    if args.samplereplace:
        newsample = [":" in tuple(x) and x.split(":") or tuple([x, x]) for x in args.samplereplace]
        unmatched = [x for x in enumerate(samplelabels)]
        for old, new in newsample:
            labelmatched = False
            for j, (i, name) in enumerate(unmatched):
                if old in name:
                    samplelabels[i] = new
                    labelmatched = j
                    break
            if labelmatched != False:
                del unmatched[labelmatched]
    mvf.metadata["labels"] = samplelabels[:]
    for i, label in enumerate(samplelabels):
        mvf.metadata["samples"][i] = {"label": label}
    mvf.metadata["ncol"] = len(mvf.metadata["labels"])
    mvf.metadata["sourceformat"] = vcf.metadata["sourceformat"]
    ## WRITE MVF HEADER
    mvf.write_data(mvf.get_header())
    mvfentries = []
    nentry = 0
    for vcfrecord in vcf.iterentries(vars(args)):
        mvf_alleles = encode_mvfstring("".join(vcfrecord["genotypes"]))
        if mvf_alleles:
            mvfentries.append(
                (contig_translate.get(vcfrecord["contig"], vcfrecord["contig"]), vcfrecord["coord"], (mvf_alleles,))
            )
            nentry += 1
            if nentry == args.linebuffer:
                mvf.write_entries(mvfentries, encoded=True)
                mvfentries = []
                nentry = 0
    if mvfentries:
        mvf.write_entries(mvfentries)
        mvfentries = []
    return ""
コード例 #4
0
ファイル: mvf_test_encode.py プロジェクト: peaselab/mvftools
    'ATTTTTTTTT',
    'A---------',
    'ATCCCCCCCC',
    'A-CCCCCCCC',
    'ATGCCCCCCC',
    'AGCGGGGGGG',
    'AT--------',
    'A-T-------',
    'A--T------',
]

NRAND = 100000
RANDOM_STRINGS = [''.join(choices("ATGCX-", k=10)) for _ in range(NRAND)]
for x in TEST_STRINGS:
    print(x)
    y = encode_mvfstring(x)
    print(y)
    z = decode_mvfstring(y, NCOL)
    print(z)
    print(x == z)
    print("==========")

RANDOM_PASS = 0
for x in RANDOM_STRINGS:
    #print(x)
    y = encode_mvfstring(x)
    #print(y)
    z = decode_mvfstring(y, NCOL)
    #print(z)
    #print(x == z)
    #print("==========")
コード例 #5
0
ファイル: geno2mvf.py プロジェクト: wum5/mvftools
def main(arguments=sys.argv[1:]):
    """Main method for geno2mvf"""
    parser = argparse.ArgumentParser(description="""
    Converts GATK Genotype Format to MVF file with some filters """)
    parser.add_argument("--geno", help="input .geno file", required=True)
    parser.add_argument("--out", help="output MVF file", required=True)
    parser.add_argument("--contigids", nargs='*',
                        help=("manually specify one or more contig ids"
                              " as ID:NAME"))
    parser.add_argument("--samplereplace", nargs="*",
                        help="""one or more TAG:NEWLABEL or TAG, items,
                                if TAG found in sample label, replace with
                                NEW (or TAG if NEW not specified)
                                NEW and TAG must each be unique""")
    parser.add_argument("--reflabel", default="REF",
                        help="""label of the reference sample
                                (default is first entry)""")
    parser.add_argument("--no_autoindex", action="store_true",
                        help="do not automatically index contigs")
    parser.add_argument("--fieldsep", default="SPACE",
                        choices=['TAB', 'SPACE', 'DBLSPACE', 'COMMA', 'MIXED'],
                        help="""entry field separator (default='SPACE')""")
    parser.add_argument("--linebuffer", type=int, default=100000,
                        help="number of lines to hold in read/write buffer")
    parser.add_argument("--overwrite", action="store_true",
                        help="USE WITH CAUTION: force overwrite of outputs")
    parser.add_argument("--quiet", action="store_true",
                        help="suppress progress meter")
    parser.add_argument("-v", "--version", action="store_true",
                        help="display version information")
    args = parser.parse_args(args=arguments)
    if args.version:
        print("Version 2015-02-01: Initial Public Release")
        sys.exit()
    sepchars = dict([("TAB", "\t"), ("SPACE", " "), ("DBLSPACE", "  "),
                     ("COMMA", ","), ("MIXED", None)])
    args.fieldsep = sepchars[args.fieldsep]
    ## ESTABLISH GENO
    geno = GenoFile(args.geno, indexcontigs=(not args.no_autoindex))
    ## ESTABLISH MVF
    mvf = MultiVariantFile(args.out, 'write', overwrite=args.overwrite)
    # PROCESS CONTIG INFO
    contigs = geno.metadata['contigs'].copy()
    maxcontigid = 0
    newids = set([])
    if args.contigids:
        for cid, cname in (x.split(':') for x in args.contigids):
            for tempid in contigs:
                if cname in contigs[tempid]['label']:
                    try:
                        cid = int(cid)
                    except ValueError:
                        pass
                    mvf.metadata['contigs'][cid] = contigs[tempid].copy()
                    del contigs[tempid]
                    newids.update([cid])
                    break
        for cid in newids:
            try:
                maxcontigid = max([maxcontigid, int(cid) + 1])
            except ValueError:
                continue
    tempids = set(contigs.keys()) - newids
    for tempid, newid in sorted(zip(
            tempids, xrange(maxcontigid, maxcontigid + len(tempids)))):
        mvf.metadata['contigs'][newid] = geno.metadata['contigs'][tempid]
    contig_translate = dict([(mvf.metadata['contigs'][x]['label'], x)
                             for x in mvf.metadata['contigs']])
    # PROCESS SAMPLE INFO
    samplelabels = geno.metadata['samples'][:]
    if args.samplereplace:
        newsample = [':' in tuple(x) and x.split(':') or tuple([x, x])
                     for x in args.samplereplace]
        unmatched = [x for x in enumerate(samplelabels)]
        for old, new in newsample:
            labelmatched = False
            for j, (i, name) in enumerate(unmatched):
                if old in name:
                    samplelabels[i] = new
                    labelmatched = j
                    break
            if labelmatched != False:
                del unmatched[labelmatched]
    mvf.metadata['labels'] = samplelabels[:]
    for i, label in enumerate(samplelabels):
        mvf.metadata['samples'][i] = {'label': label}
    mvf.metadata['ncol'] = len(mvf.metadata['labels'])
    mvf.metadata['sourceformat'] = geno.metadata['sourceformat']
    ## WRITE MVF HEADER
    mvf.write_data(mvf.get_header())
    mvfentries = []
    nentry = 0
    for record in geno.iterentries(vars(args)):
        mvf_alleles = encode_mvfstring(''.join(record['genotypes']))
        if mvf_alleles:
            mvfentries.append(
                (contig_translate.get(record['contig'], record['contig']),
                 record['coord'], mvf_alleles))
            nentry += 1
            if nentry == args.linebuffer:
                mvf.write_entries(mvfentries, encoded=True)
                mvfentries = []
                nentry = 0
    if mvfentries:
        mvf.write_entries(mvfentries)
        mvfentries = []
    return ''
コード例 #6
0
ファイル: maf2mvf.py プロジェクト: wum5/mvftools
def main(arguments=sys.argv[1:]):
    """Main method for maf2mvf"""
    parser = argparse.ArgumentParser(description="""
    Converts Multiple Alignment Files to MVF file with some filters """)
    parser.add_argument("--maf", help="input MAF file")
    parser.add_argument("--out", help="output MVF file")
    parser.add_argument("--reftag", help="old reference tag")
    parser.add_argument("--mvfreflabel", default="REF",
                        help="new label for reference sample (default='REF')")
    parser.add_argument("--contigids", nargs='*',
                        help=("manually specify one or more contig ids"
                              " as ID:NAME"))
    parser.add_argument("--sampletags", nargs="*",
                        help="""one or more TAG:NEWLABEL or TAG, items,
                                if TAG found in sample label, replace with
                                NEW (or TAG if NEW not specified)
                                NEW and TAG must each be unique""")
    parser.add_argument("--linebuffer", type=int, default=100000,
                        help="number of lines to hold in read/write buffer")
    parser.add_argument("--overwrite", action="store_true")
    args = parser.parse_args(args=arguments)
    ## ESTABLISH MAF
    maf = MultiAlignFile(args)
    ## ESTABLISH MVF
    mvf = MultiVariantFile(args.out, 'write', overwrite=args.overwrite)
    # PROCESS CONTIG INFO
    # contigs = dict.fromkeys((sorted([x for x in maf.meta['name_index']
    #                             if x.find(args.reftag) > -1])), {})
    # print(contigs)
    # maxcontigid = 0
    # newids = set([])
    # if args.contigids:
    #     for cid, cname in (x.split(':') for x in args.contigids):
    #         for tempid in contigs:
    #             if cname in contigs[tempid]['label']:
    #                 try:
    #                     cid = int(cid)
    #                 except ValueError:
    #                     pass
    #                 mvf.metadata['contigs'][cid] = contigs[tempid].copy()
    #                 del contigs[tempid]
    #                 newids.update([cid])
    #                 break
    #     for cid in newids:
    #         try:
    #             maxcontigid = max([maxcontigid, int(cid) + 1])
    #         except ValueError:
    #             continue
    # tempids = set(contigs.keys()) - newids
    # for tempid, newid in zip(
    #         tempids, xrange(maxcontigid, maxcontigid + len(tempids))):
    #    # mvf.metadata['contigs'][newid] = maf.meta['contigs'][tempid]
    #         pass
    # contig_translate = dict([(mvf.metadata['contigs'][x]['label'], x)
    #                          for x in mvf.metadata['contigs']])
    # PROCESS SAMPLE INFO
    contig_translate = {1: 1}
    samplelabels = [s.split(':')[0] for s in args.sampletags]
    samplelabels.remove(args.reftag)
    samplelabels.insert(0, args.reftag)
    # if args.samplereplace:
    #     newsample = [':' in tuple(x) and x.split(':') or tuple([x,x])
    #                  for x in args.samplereplace]
    mvf.metadata['labels'] = samplelabels[:]
    for i, label in enumerate(samplelabels):
        mvf.metadata['samples'][i] = {'label': label}
    mvf.metadata['ncol'] = len(mvf.metadata['labels'])
    mvf.metadata['sourceformat'] = maf.metadata['sourceformat']
    ## WRITE MVF HEADER
    mvf.write_data(mvf.get_header())
    mvfentries = []
    nentry = 0
    for pos, length, msa in maf:
        for s in samplelabels:
            if s not in msa:
                msa[s] = '-'*length
        msa['contig'] = 1
        for i in range(length):
            mvf_alleles = encode_mvfstring(
                ''.join(msa[s][i].strip() for s in samplelabels))
            if mvf_alleles:
                mvfentries.append(
                    (contig_translate.get(msa['contig']),
                     pos+i, (mvf_alleles,)))
                nentry += 1
                if nentry == args.linebuffer:
                    mvf.write_entries(mvfentries, encoded=True)
                    mvfentries = []
                    nentry = 0
    if mvfentries:
        mvf.write_entries(mvfentries)

    return ''