예제 #1
0
def maf2Dict(multipleAlignment, id = None):
    """
    store multiple-alignment information
    in a dictionary
    """

    data = dict()

    chrom = multipleAlignment[0].id.split(".")[1]
    genome_start = multipleAlignment[0].annotations['start']
    genome_stop =  genome_start + multipleAlignment[0].annotations['size']
    data['key'] = chrom + "." + str(genome_start)
    data['genome_start'] = genome_start
    data['genome_stop'] = genome_stop
    data['chromosome'] = chrom
    data['speciesAligned'] = list(set(record.id.split(".")[0] \
                                                for record in multipleAlignment))
    data['nSpeciesAligned'] = len(data['speciesAligned'])
    data['maf'] = encoding.dumps(multipleAlignment.format("maf").replace( \
        "# generated by Biopython\n", "")[:-2]) #save some space

    return data
예제 #2
0
def maf2Dict(multipleAlignment, id=None):
    """
    store multiple-alignment information
    in a dictionary
    """

    data = dict()

    chrom = multipleAlignment[0].id.split(".")[1]
    genome_start = multipleAlignment[0].annotations['start']
    genome_stop = genome_start + multipleAlignment[0].annotations['size']
    data['key'] = chrom + "." + str(genome_start)
    data['genome_start'] = genome_start
    data['genome_stop'] = genome_stop
    data['chromosome'] = chrom
    data['speciesAligned'] = list(set(record.id.split(".")[0] \
                                                for record in multipleAlignment))
    data['nSpeciesAligned'] = len(data['speciesAligned'])
    data['maf'] = encoding.dumps(multipleAlignment.format("maf").replace( \
        "# generated by Biopython\n", "")[:-2]) #save some space

    return data
예제 #3
0
    in a dictionary
    """

    data = dict()

    chrom = multipleAlignment[0].id.split(".")[1]
    genome_start = multipleAlignment[0].annotations['start']
    genome_stop =  genome_start + multipleAlignment[0].annotations['size']
    data['key'] = chrom + "." + str(genome_start)
    data['genome_start'] = genome_start
    data['genome_stop'] = genome_stop
    data['chromosome'] = chrom
    data['speciesAligned'] = list(set(record.id.split(".")[0] \
                                                for record in multipleAlignment))
    data['nSpeciesAligned'] = len(data['speciesAligned'])
    data['maf'] = encoding.dumps(multipleAlignment.format("maf").replace( \
        "# generated by Biopython\n", "")[:-2]) #save some space

    return data

def maf_dict_generator(mafFile):
    with open(mafFile, 'r') as mafFileHandle:
        for blockN, multipleAlignment in enumerate(Bio.AlignIO.parse(mafFileHandle,  "maf")):
            yield maf2Dict(multipleAlignment, blockN)

if __name__ == "__main__":
    mafFiles = sys.argv[1:]
    for mafFile in mafFiles:
        for converted in maf_dict_generator(mafFile):
            print encoding.dumps(converted)
예제 #4
0
    data = dict()

    chrom = multipleAlignment[0].id.split(".")[1]
    genome_start = multipleAlignment[0].annotations['start']
    genome_stop = genome_start + multipleAlignment[0].annotations['size']
    data['key'] = chrom + "." + str(genome_start)
    data['genome_start'] = genome_start
    data['genome_stop'] = genome_stop
    data['chromosome'] = chrom
    data['speciesAligned'] = list(set(record.id.split(".")[0] \
                                                for record in multipleAlignment))
    data['nSpeciesAligned'] = len(data['speciesAligned'])
    data['maf'] = encoding.dumps(multipleAlignment.format("maf").replace( \
        "# generated by Biopython\n", "")[:-2]) #save some space

    return data


def maf_dict_generator(mafFile):
    with open(mafFile, 'r') as mafFileHandle:
        for blockN, multipleAlignment in enumerate(
                Bio.AlignIO.parse(mafFileHandle, "maf")):
            yield maf2Dict(multipleAlignment, blockN)


if __name__ == "__main__":
    mafFiles = sys.argv[1:]
    for mafFile in mafFiles:
        for converted in maf_dict_generator(mafFile):
            print encoding.dumps(converted)